├── .github └── workflows │ └── weread.yml ├── README.md ├── WechatIMG20.jpg ├── requirements.txt └── weread.py /.github/workflows/weread.yml: -------------------------------------------------------------------------------- 1 | name: weread sync 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: "0 0 * * *" 7 | jobs: 8 | sync: 9 | name: Sync 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v3 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: 3.9 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install -r requirements.txt 23 | - name: weread sync 24 | run: | 25 | python weread.py "${{secrets.WEREAD_COOKIE}}" "${{secrets.NOTION_TOKEN}}" "${{secrets.NOTION_DATABASE_ID}}" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 将微信读书划线和笔记同步到Notion 2 | 3 | 4 | 本项目通过Github Action每天定时同步微信读书划线到Notion。 5 | 6 | 预览效果:https://book.malinkang.com 7 | 8 | > [!WARNING] 9 | > 请不要在Page里面添加自己的笔记,有新的笔记的时候会删除原笔记重新添加。 10 | 11 | 12 | ## 使用 13 | 14 | 1. star本项目 15 | 2. fork这个工程 16 | 3. 获取微信读书的Cookie 17 | * 浏览器打开 https://weread.qq.com/ 18 | * 微信扫码登录确认,提示没有权限忽略即可 19 | * 按F12进入开发者模式,依次点 Network -> Doc -> Headers-> cookie。复制 Cookie 字符串; 20 | 4. 获取NotionToken 21 | * 浏览器打开https://www.notion.so/my-integrations 22 | * 点击New integration 输入name提交 23 | * 点击show,然后copy 24 | 5. 复制[这个Notion模板](https://malinkang.notion.site/e27842548a6d4a81bc7aea736d90d6dd?v=b255858d3eaa409f97f1ecb32a14a5b6&pvs=4),删掉所有的数据,并点击右上角设置,Connections添加你创建的Integration。 25 | 26 | 6. 获取NotionDatabaseID 27 | * 打开Notion数据库,点击右上角的Share,然后点击Copy link 28 | * 获取链接后比如 https://www.notion.so/malinkang/1b78f0fd0d03484caa00154285ffec0c?v=7ed7e3fbe69043a28d2847e76f075d99&pvs=4 中间的1b78f0fd0d03484caa00154285ffec0c就是DatabaseID 29 | 7. 在Github的Secrets中添加以下变量 30 | * 打开你fork的工程,点击Settings->Secrets and variables->New repository secret 31 | * 添加以下变量 32 | * WEREAD_COOKIE 33 | * NOTION_TOKEN 34 | * NOTION_DATABASE_ID 35 | 36 | ## 微信群 37 | 38 | ![image](WechatIMG20.jpg) 39 | 40 | -------------------------------------------------------------------------------- /WechatIMG20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GitHub01409991/weread2notion/6e7060d364d864a35ef4a2d40afd5de9d069a4ff/WechatIMG20.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | notion-client -------------------------------------------------------------------------------- /weread.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import argparse 4 | import json 5 | import logging 6 | import re 7 | import time 8 | from notion_client import Client 9 | import requests 10 | from requests.utils import cookiejar_from_dict 11 | from http.cookies import SimpleCookie 12 | from datetime import datetime 13 | import hashlib 14 | 15 | WEREAD_URL = "https://weread.qq.com/" 16 | WEREAD_NOTEBOOKS_URL = "https://i.weread.qq.com/user/notebooks" 17 | WEREAD_BOOKMARKLIST_URL = "https://i.weread.qq.com/book/bookmarklist" 18 | WEREAD_CHAPTER_INFO = "https://i.weread.qq.com/book/chapterInfos" 19 | WEREAD_READ_INFO_URL = "https://i.weread.qq.com/book/readinfo" 20 | WEREAD_REVIEW_LIST_URL = "https://i.weread.qq.com/review/list" 21 | WEREAD_BOOK_INFO = "https://i.weread.qq.com/book/info" 22 | 23 | 24 | def parse_cookie_string(cookie_string): 25 | cookie = SimpleCookie() 26 | cookie.load(cookie_string) 27 | cookies_dict = {} 28 | cookiejar = None 29 | for key, morsel in cookie.items(): 30 | cookies_dict[key] = morsel.value 31 | cookiejar = cookiejar_from_dict( 32 | cookies_dict, cookiejar=None, overwrite=True 33 | ) 34 | return cookiejar 35 | 36 | 37 | def get_bookmark_list(bookId): 38 | """获取我的划线""" 39 | params = dict(bookId=bookId) 40 | r = session.get(WEREAD_BOOKMARKLIST_URL, params=params) 41 | if r.ok: 42 | updated = r.json().get("updated") 43 | updated = sorted(updated, key=lambda x: ( 44 | x.get("chapterUid", 1), int(x.get("range").split("-")[0]))) 45 | return r.json()["updated"] 46 | return None 47 | 48 | 49 | def get_read_info(bookId): 50 | params = dict(bookId=bookId, readingDetail=1, 51 | readingBookIndex=1, finishedDate=1) 52 | r = session.get(WEREAD_READ_INFO_URL, params=params) 53 | if r.ok: 54 | return r.json() 55 | return None 56 | 57 | 58 | def get_bookinfo(bookId): 59 | """获取书的详情""" 60 | params = dict(bookId=bookId) 61 | r = session.get(WEREAD_BOOK_INFO, params=params) 62 | isbn = "" 63 | if r.ok: 64 | data = r.json() 65 | isbn = data["isbn"] 66 | newRating = data["newRating"]/1000 67 | return (isbn, newRating) 68 | else: 69 | print(f"get {bookId} book info failed") 70 | return ("",0) 71 | 72 | 73 | def get_review_list(bookId): 74 | """获取笔记""" 75 | params = dict(bookId=bookId, listType=11, mine=1, syncKey=0) 76 | r = session.get(WEREAD_REVIEW_LIST_URL, params=params) 77 | reviews = r.json().get("reviews") 78 | summary = list(filter(lambda x: x.get("review").get("type") == 4, reviews)) 79 | reviews = list(filter(lambda x: x.get("review").get("type") == 1, reviews)) 80 | reviews = list(map(lambda x: x.get("review"), reviews)) 81 | reviews = list(map(lambda x: {**x, "markText": x.pop("content")}, reviews)) 82 | return summary, reviews 83 | 84 | 85 | def get_table_of_contents(): 86 | """获取目录""" 87 | return { 88 | "type": "table_of_contents", 89 | "table_of_contents": { 90 | "color": "default" 91 | } 92 | } 93 | 94 | 95 | def get_heading(level, content): 96 | if level == 1: 97 | heading = "heading_1" 98 | elif level == 2: 99 | heading = "heading_2" 100 | else: 101 | heading = "heading_3" 102 | return { 103 | "type": heading, 104 | heading: { 105 | "rich_text": [{ 106 | "type": "text", 107 | "text": { 108 | "content": content, 109 | } 110 | }], 111 | "color": "default", 112 | "is_toggleable": False 113 | } 114 | } 115 | 116 | 117 | def get_quote(content): 118 | return { 119 | "type": "quote", 120 | "quote": { 121 | "rich_text": [{ 122 | "type": "text", 123 | "text": { 124 | "content": content 125 | }, 126 | }], 127 | "color": "default" 128 | } 129 | } 130 | 131 | 132 | def get_callout(content, style, colorStyle, reviewId): 133 | # 根据不同的划线样式设置不同的emoji 直线type=0 背景颜色是1 波浪线是2 134 | emoji = "🌟" 135 | if style == 0: 136 | emoji = "💡" 137 | elif style == 1: 138 | emoji = "⭐" 139 | # 如果reviewId不是空说明是笔记 140 | if reviewId != None: 141 | emoji = "✍️" 142 | color = "default" 143 | # 根据划线颜色设置文字的颜色 144 | if colorStyle == 1: 145 | color = "red" 146 | elif colorStyle == 2: 147 | color = "purple" 148 | elif colorStyle == 3: 149 | color = "blue" 150 | elif colorStyle == 4: 151 | color = "green" 152 | elif colorStyle == 5: 153 | color = "yellow" 154 | return { 155 | "type": "callout", 156 | "callout": { 157 | "rich_text": [{ 158 | "type": "text", 159 | "text": { 160 | "content": content, 161 | } 162 | }], 163 | "icon": { 164 | "emoji": emoji 165 | }, 166 | "color": color 167 | } 168 | } 169 | 170 | 171 | def check(bookId): 172 | """检查是否已经插入过 如果已经插入了就删除""" 173 | time.sleep(0.3) 174 | filter = { 175 | "property": "BookId", 176 | "rich_text": { 177 | "equals": bookId 178 | } 179 | } 180 | response = client.databases.query(database_id=database_id, filter=filter) 181 | for result in response["results"]: 182 | time.sleep(0.3) 183 | client.blocks.delete(block_id=result["id"]) 184 | 185 | 186 | def get_chapter_info(bookId): 187 | """获取章节信息""" 188 | body = { 189 | 'bookIds': [bookId], 190 | 'synckeys': [0], 191 | 'teenmode': 0 192 | } 193 | r = session.post(WEREAD_CHAPTER_INFO, json=body) 194 | if r.ok and "data" in r.json() and len(r.json()["data"]) == 1 and "updated" in r.json()["data"][0]: 195 | update = r.json()["data"][0]["updated"] 196 | return {item["chapterUid"]: item for item in update} 197 | return None 198 | 199 | 200 | def insert_to_notion(bookName, bookId, cover, sort, author,isbn,rating,categories): 201 | """插入到notion""" 202 | time.sleep(0.3) 203 | parent = { 204 | "database_id": database_id, 205 | "type": "database_id" 206 | } 207 | properties = { 208 | "BookName": {"title": [{"type": "text", "text": {"content": bookName}}]}, 209 | "BookId": {"rich_text": [{"type": "text", "text": {"content": bookId}}]}, 210 | "ISBN": {"rich_text": [{"type": "text", "text": {"content": isbn}}]}, 211 | "URL": {"url": f"https://weread.qq.com/web/reader/{calculate_book_str_id(bookId)}"}, 212 | "Author": {"rich_text": [{"type": "text", "text": {"content": author}}]}, 213 | "Sort": {"number": sort}, 214 | "Rating": {"number": rating}, 215 | "Cover": {"files": [{"type": "external", "name": "Cover", "external": {"url": cover}}]}, 216 | } 217 | if(categories!=None): 218 | multi_select = [{"name": x} for x in categories] 219 | properties["Categories"] = {"multi_select":multi_select} 220 | read_info = get_read_info(bookId=bookId) 221 | if read_info != None: 222 | markedStatus = read_info.get("markedStatus", 0) 223 | readingTime = read_info.get("readingTime", 0) 224 | readingProgress = read_info.get("readingProgress", 0) 225 | format_time = "" 226 | hour = readingTime // 3600 227 | if hour > 0: 228 | format_time += f"{hour}时" 229 | minutes = readingTime % 3600 // 60 230 | if minutes > 0: 231 | format_time += f"{minutes}分" 232 | properties["Status"] = {"select": { 233 | "name": "读完" if markedStatus == 4 else "在读"}} 234 | properties["ReadingTime"] = {"rich_text": [ 235 | {"type": "text", "text": {"content": format_time}}]} 236 | properties["Progress"] ={"number": readingProgress} 237 | if "finishedDate" in read_info: 238 | properties["Date"] = {"date": {"start": datetime.utcfromtimestamp(read_info.get( 239 | "finishedDate")).strftime("%Y-%m-%d %H:%M:%S"), "time_zone": "Asia/Shanghai"}} 240 | 241 | if(cover.startswith("http")): 242 | icon = { 243 | "type": "external", 244 | "external": { 245 | "url": cover 246 | } 247 | } 248 | else: 249 | icon = {"type": "emoji","emoji": "📚"} 250 | # notion api 限制100个block 251 | response = client.pages.create( 252 | parent=parent, icon=icon, properties=properties) 253 | id = response["id"] 254 | return id 255 | 256 | 257 | def add_children(id, children): 258 | results = [] 259 | for i in range(0, len(children)//100+1): 260 | time.sleep(0.3) 261 | response = client.blocks.children.append( 262 | block_id=id, children=children[i*100:(i+1)*100]) 263 | results.extend(response.get("results")) 264 | return results if len(results) == len(children) else None 265 | 266 | 267 | def add_grandchild(grandchild, results): 268 | for key, value in grandchild.items(): 269 | time.sleep(0.3) 270 | id = results[key].get("id") 271 | client.blocks.children.append(block_id=id, children=[value]) 272 | 273 | 274 | def get_notebooklist(): 275 | """获取笔记本列表""" 276 | r = session.get(WEREAD_NOTEBOOKS_URL) 277 | if r.ok: 278 | data = r.json() 279 | books = data.get("books") 280 | books.sort(key=lambda x: x["sort"]) 281 | return books 282 | else: 283 | print(r.text) 284 | return None 285 | 286 | 287 | def get_sort(): 288 | """获取database中的最新时间""" 289 | filter = { 290 | "property": "Sort", 291 | "number": { 292 | "is_not_empty": True 293 | } 294 | } 295 | sorts = [ 296 | { 297 | "property": "Sort", 298 | "direction": "descending", 299 | } 300 | ] 301 | response = client.databases.query( 302 | database_id=database_id, filter=filter, sorts=sorts, page_size=1) 303 | if (len(response.get("results")) == 1): 304 | return response.get("results")[0].get("properties").get("Sort").get("number") 305 | return 0 306 | 307 | 308 | def get_children(chapter, summary, bookmark_list): 309 | children = [] 310 | grandchild = {} 311 | if chapter != None: 312 | # 添加目录 313 | children.append(get_table_of_contents()) 314 | d = {} 315 | for data in bookmark_list: 316 | chapterUid = data.get("chapterUid", 1) 317 | if (chapterUid not in d): 318 | d[chapterUid] = [] 319 | d[chapterUid].append(data) 320 | for key, value in d.items(): 321 | if key in chapter: 322 | # 添加章节 323 | children.append(get_heading( 324 | chapter.get(key).get("level"), chapter.get(key).get("title"))) 325 | for i in value: 326 | markText = i.get("markText") 327 | for j in range(0, len(markText)//2000+1): 328 | children.append(get_callout(markText[j*2000:(j+1)*2000],i.get("style"), i.get("colorStyle"), i.get("reviewId"))) 329 | if i.get("abstract") != None and i.get("abstract") != "": 330 | quote = get_quote(i.get("abstract")) 331 | grandchild[len(children)-1] = quote 332 | 333 | else: 334 | # 如果没有章节信息 335 | for data in bookmark_list: 336 | markText = data.get("markText") 337 | for i in range(0, len(markText)//2000+1): 338 | children.append(get_callout(markText[i*200:(i+1)*2000], 339 | data.get("style"), data.get("colorStyle"), data.get("reviewId"))) 340 | if summary != None and len(summary) > 0: 341 | children.append(get_heading(1, "点评")) 342 | for i in summary: 343 | content = i.get("review").get("content") 344 | for j in range(0, len(content)//2000+1): 345 | children.append(get_callout(content[j*2000:(j+1)*2000], i.get( 346 | "style"), i.get("colorStyle"), i.get("review").get("reviewId"))) 347 | return children, grandchild 348 | 349 | def transform_id(book_id): 350 | id_length = len(book_id) 351 | 352 | if re.match("^\d*$", book_id): 353 | ary = [] 354 | for i in range(0, id_length, 9): 355 | ary.append(format(int(book_id[i:min(i + 9, id_length)]), 'x')) 356 | return '3', ary 357 | 358 | result = '' 359 | for i in range(id_length): 360 | result += format(ord(book_id[i]), 'x') 361 | return '4', [result] 362 | 363 | def calculate_book_str_id(book_id): 364 | md5 = hashlib.md5() 365 | md5.update(book_id.encode('utf-8')) 366 | digest = md5.hexdigest() 367 | result = digest[0:3] 368 | code, transformed_ids = transform_id(book_id) 369 | result += code + '2' + digest[-2:] 370 | 371 | for i in range(len(transformed_ids)): 372 | hex_length_str = format(len(transformed_ids[i]), 'x') 373 | if len(hex_length_str) == 1: 374 | hex_length_str = '0' + hex_length_str 375 | 376 | result += hex_length_str + transformed_ids[i] 377 | 378 | if i < len(transformed_ids) - 1: 379 | result += 'g' 380 | 381 | if len(result) < 20: 382 | result += digest[0:20 - len(result)] 383 | 384 | md5 = hashlib.md5() 385 | md5.update(result.encode('utf-8')) 386 | result += md5.hexdigest()[0:3] 387 | return result 388 | 389 | if __name__ == "__main__": 390 | parser = argparse.ArgumentParser() 391 | parser.add_argument("weread_cookie") 392 | parser.add_argument("notion_token") 393 | parser.add_argument("database_id") 394 | options = parser.parse_args() 395 | weread_cookie = options.weread_cookie 396 | database_id = options.database_id 397 | notion_token = options.notion_token 398 | session = requests.Session() 399 | session.cookies = parse_cookie_string(weread_cookie) 400 | client = Client( 401 | auth=notion_token, 402 | log_level=logging.ERROR 403 | ) 404 | session.get(WEREAD_URL) 405 | latest_sort = get_sort() 406 | books = get_notebooklist() 407 | i = 0 408 | if (books != None): 409 | for book in books: 410 | i +=1 411 | sort = book["sort"] 412 | if sort <= latest_sort: 413 | continue 414 | book = book.get("book") 415 | title = book.get("title") 416 | cover = book.get("cover") 417 | if book.get("author") == "公众号" and book.get("cover").endswith("/0"): 418 | cover += ".jpg" 419 | bookId = book.get("bookId") 420 | author = book.get("author") 421 | categories = book.get("categories") 422 | if(categories!=None): 423 | categories = [x["title"] for x in categories] 424 | print(f"正在同步 {title} ,一共{len(books)}本,当前是第{i}本。") 425 | check(bookId) 426 | isbn,rating = get_bookinfo(bookId) 427 | id = insert_to_notion(title, bookId, cover, sort, author,isbn,rating,categories) 428 | chapter = get_chapter_info(bookId) 429 | bookmark_list = get_bookmark_list(bookId) 430 | summary, reviews = get_review_list(bookId) 431 | bookmark_list.extend(reviews) 432 | bookmark_list = sorted(bookmark_list, key=lambda x: ( 433 | x.get("chapterUid", 1), 0 if (x.get("range", "") == "" or x.get("range").split("-")[0]=="" ) else int(x.get("range").split("-")[0]))) 434 | children, grandchild = get_children( 435 | chapter, summary, bookmark_list) 436 | results = add_children(id, children) 437 | if(len(grandchild)>0 and results!=None): 438 | add_grandchild(grandchild, results) 439 | --------------------------------------------------------------------------------