├── requirements.txt ├── README.md └── douban2notion.py /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 2 | requests 3 | gooey -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 利用 python 通过 Notion API 将豆瓣图书及影视信息导入 Notion,包括:书籍、电视剧及电影。 2 | 3 | ## Dependences 4 | 5 | - `beautifulsoup4` 6 | - `requests` 7 | - `gooey` 8 | - `pyinstaller`(Optional) 9 | 10 | ## Quickstart 11 | 12 | ### 创建 Notion API 13 | 14 | 1. 点击以下链接创建 Notion API,注意确保勾选了可编辑权限 15 | 16 | [Notion - The all-in-one workspace for your notes, tasks, wikis, and databases.](https://www.notion.so/my-integrations) 17 | 18 | 2. 获取 `Token` 值,并复制记录,如: 19 | 20 | ![notion_token](https://raw.githubusercontent.com/jarrett-au/img_bed/master/2022/02/10_Untitled.png) 21 | 22 | 23 | ### Duplicate 模板 24 | 25 | 28 | 29 | 1. 按需Duplicate以下模板: 30 | - [📚书库模板](https://www.notion.so/6a4c56ded2cc4d1a9793a0434188994d?pvs=21) 31 | - [📺追剧模板](https://www.notion.so/eb3ba38856844aa6a58954896d298c9f?pvs=21) 32 | - [🎬电影模板](https://www.notion.so/3fb8fbaea4574c73959f55f6745b9565?pvs=21) 33 | 2. 在模板页面点击 **Share** 按钮将刚创建的机器人 `Invite` 进去: 34 | 35 | ![invite_bot](https://raw.githubusercontent.com/jarrett-au/img_bed/master/2022/02/10_Untitled%201.png) 36 | 37 | 3. 获取模板的 `database_id` ,并复制记录,如: 38 | 39 | ![database_id](https://raw.githubusercontent.com/jarrett-au/img_bed/master/2022/02/10_Untitled%202.png) 40 | 41 | 42 | ### 运行脚本 43 | 44 | 1. 安装所需依赖 45 | ```bash 46 | pip install -r requirements.txt 47 | ``` 48 | 2. 运行主程序,如果是第一次运行会提示输入 `NOTION_API_TOKEN`、`BOOK_DATABASE_ID`、`MOVIE_DATABASE_ID` 和 `TV_DATABASE_ID`,生成的配置文件 `config.json` 默认保存到相同目录下 49 | 3. 选择运行模式,并输入 `subject_id`(可输入多个,注意用英文逗号 `,` 隔开) 50 | 51 | ```bash 52 | python douban2notion.py 53 | ``` 54 | 55 | ![subject_id](https://raw.githubusercontent.com/jarrett-au/img_bed/master/2022/02/10_Untitled%204.png) 56 | 57 | 4. (可选)[下载](https://github.com/jarrett-au/douban2notion/releases/tag/v1.0.0)可执行程序,或者自行 `pyinstaller` 打包 58 | 59 | ```bash 60 | pip install pyinstaller 61 | pyinstaller -Fw douban2notion.py 62 | ``` 63 | -------------------------------------------------------------------------------- /douban2notion.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import requests 4 | from bs4 import BeautifulSoup 5 | from gooey import Gooey, GooeyParser 6 | 7 | 8 | class Parser: 9 | def __init__(self, url, notion_database_id): 10 | self.url = url 11 | self.notion_database_id = notion_database_id 12 | 13 | def get_html(self): 14 | headers = { 15 | "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36" 16 | } 17 | response = requests.get(self.url, headers=headers) 18 | return response.text 19 | 20 | def book_parser(self): 21 | soup = BeautifulSoup(self.get_html(), "lxml") 22 | # 提取书名,封面,评分 23 | mainpic = soup.find("div", id="mainpic") 24 | book_name = soup.find("meta", property="og:title")["content"] 25 | book_img = mainpic.img["src"] 26 | book_score = soup.find("div", id="interest_sectl").find("strong").text 27 | book_score = float(book_score) 28 | 29 | # 作者 30 | author_name = soup.find("div", id="info").find("a").text 31 | if "\n" in author_name: 32 | author_name = author_name.split("\n")[-1].strip() 33 | 34 | # 标签 35 | is_tags = soup.find("div", id="db-tags-section") 36 | if is_tags: 37 | tag_list = is_tags.find_all("a") 38 | book_tags = [tag.text for tag in tag_list] 39 | else: 40 | book_tags = None 41 | 42 | body = { 43 | "parent": {"type": "database_id", "database_id": self.notion_database_id}, 44 | "properties": { 45 | "书名": {"title": [{"type": "text", "text": {"content": book_name}}]}, 46 | "书籍分类": {"multi_select": []}, 47 | "豆瓣评分": {"number": book_score}, 48 | "豆瓣链接": {"url": self.url}, 49 | "封面": { 50 | "files": [ 51 | { 52 | "type": "external", 53 | "name": book_img[-13:], 54 | "external": {"url": book_img}, 55 | } 56 | ] 57 | }, 58 | "状态": {"select": {"name": "想读"}}, 59 | "作者": { 60 | "rich_text": [{"type": "text", "text": {"content": author_name}}] 61 | }, 62 | }, 63 | } 64 | # 标签处理 65 | if book_tags: 66 | for tag in book_tags[:3]: 67 | if tag not in book_name and tag not in author_name: 68 | body["properties"]["书籍分类"]["multi_select"].append({"name": tag}) 69 | return body 70 | 71 | def movie_parser(self): 72 | soup = BeautifulSoup(self.get_html(), "lxml") 73 | # 提取电影名称,封面,评分 74 | movie_img = soup.find("div", id="mainpic").img["src"] 75 | movie_name = soup.find("meta", property="og:title")["content"] 76 | movie_score = soup.find("div", id="interest_sectl").find("strong").text 77 | movie_score = float(movie_score) if movie_score else 0 78 | 79 | # 提取标签, 国家 80 | info = soup.find("div", id="info").get_text().strip().split("\n") 81 | info = [i.split(": ") for i in info] 82 | 83 | # 若存在官网则国家取下一个元素 84 | if info[4][0] == "官方网站": 85 | movie_country = info[5][1] 86 | else: 87 | movie_country = info[4][1] 88 | # 多国家处理 89 | if "/" in movie_country: 90 | movie_country = movie_country.split(" / ")[0] 91 | 92 | # 提取标签 93 | movie_tag = info[3][1] 94 | 95 | body = { 96 | "parent": {"type": "database_id", "database_id": self.notion_database_id}, 97 | "properties": { 98 | "影片名": {"title": [{"type": "text", "text": {"content": movie_name}}]}, 99 | "影片类型": {"multi_select": []}, 100 | "豆瓣评分": {"number": movie_score}, 101 | "影片链接": {"url": self.url}, 102 | "封面": { 103 | "files": [ 104 | { 105 | "type": "external", 106 | "name": movie_img[-13:], 107 | "external": {"url": movie_img}, 108 | } 109 | ] 110 | }, 111 | "状态": {"select": {"name": "想看"}}, 112 | "国家": {"select": {"name": movie_country, "color": "purple"}}, 113 | }, 114 | } 115 | # 标签处理 116 | if "/" in movie_tag: 117 | movie_tags = movie_tag.split(" / ") 118 | for tag in movie_tags: 119 | body["properties"]["影片类型"]["multi_select"].append({"name": tag}) 120 | else: 121 | body["properties"]["影片类型"]["multi_select"].append({"name": movie_tag}) 122 | return body 123 | 124 | 125 | def update_notion_database(url, mode, NOTION_API_TOKEN, BOOK_DATABASE_ID, MOVIE_DATABASE_ID, TV_DATABASE_ID): 126 | """ 127 | 更新notion数据库 128 | """ 129 | if mode == "book": 130 | body = Parser(url, BOOK_DATABASE_ID).book_parser() 131 | elif mode == "movie": 132 | body = Parser(url, MOVIE_DATABASE_ID).movie_parser() 133 | else: 134 | body = Parser(url, TV_DATABASE_ID).movie_parser() 135 | 136 | NotionData = requests.request( 137 | "POST", 138 | "https://api.notion.com/v1/pages", 139 | json=body, 140 | headers={ 141 | "Authorization": "Bearer " + NOTION_API_TOKEN, 142 | "Notion-Version": "2021-08-16", 143 | }, 144 | ) 145 | 146 | # 根据POST返回打印信息 147 | if str(NotionData.status_code) == "200": 148 | properties = list(body["properties"].values()) 149 | name = (properties[0]["title"][0]["text"]["content"],) 150 | tags = (", ".join([tag["name"] for tag in properties[1]["multi_select"]]),) 151 | score = (properties[2]["number"],) 152 | print("导入信息成功,影片信息为:") 153 | print(name, tags, score, url, sep=" | ") 154 | print( 155 | "-----------------------------------------------------------------------------------------------------------------" 156 | ) 157 | else: 158 | print("导入失败,请检查Body字段与Notion数据库字段:") 159 | print(NotionData.text) 160 | 161 | 162 | @Gooey(language="chinese", program_name="Douban2Notion") 163 | def main(): 164 | # 检查是否存在配置文件 165 | config_file = "config.json" 166 | if os.path.exists(config_file): 167 | with open(config_file, "r") as f: 168 | config = json.load(f) 169 | else: 170 | config = {} 171 | 172 | # 命令行参数 173 | arg_parser = GooeyParser(description="Douban to Notion App!") 174 | arg_parser.add_argument( 175 | "Mode", default="movie", choices=["book", "movie", "tv"], help="选择导入类型" 176 | ) 177 | arg_parser.add_argument("Subject_ID", help="输入subject_id,若有多个请以英文逗号分隔") 178 | arg_parser.add_argument( 179 | "--NOTION_API_TOKEN", 180 | default=config.get("NOTION_API_TOKEN", ""), 181 | help="输入NOTION_API_TOKEN", 182 | widget="PasswordField", 183 | ) 184 | arg_parser.add_argument( 185 | "--BOOK_DATABASE_ID", 186 | default=config.get("BOOK_DATABASE_ID", ""), 187 | help="输入BOOK_DATABASE_ID", 188 | ) 189 | arg_parser.add_argument( 190 | "--MOVIE_DATABASE_ID", 191 | default=config.get("MOVIE_DATABASE_ID", ""), 192 | help="输入MOVIE_DATABASE_ID", 193 | ) 194 | arg_parser.add_argument( 195 | "--TV_DATABASE_ID", 196 | default=config.get("TV_DATABASE_ID", ""), 197 | help="输入TV_DATABASE_ID", 198 | ) 199 | args = arg_parser.parse_args() 200 | 201 | # 读取命令行参数 202 | NOTION_API_TOKEN = args.NOTION_API_TOKEN or config.get("NOTION_API_TOKEN", "") 203 | BOOK_DATABASE_ID = args.BOOK_DATABASE_ID or config.get("BOOK_DATABASE_ID", "") 204 | MOVIE_DATABASE_ID = args.MOVIE_DATABASE_ID or config.get("MOVIE_DATABASE_ID", "") 205 | TV_DATABASE_ID = args.TV_DATABASE_ID or config.get("TV_DATABASE_ID", "") 206 | 207 | # 保存参数设置 208 | config["NOTION_API_TOKEN"] = NOTION_API_TOKEN 209 | config["BOOK_DATABASE_ID"] = BOOK_DATABASE_ID 210 | config["MOVIE_DATABASE_ID"] = MOVIE_DATABASE_ID 211 | config["TV_DATABASE_ID"] = TV_DATABASE_ID 212 | with open(config_file, "w") as f: 213 | json.dump(config, f) 214 | 215 | mode = args.Mode 216 | if mode == "book": 217 | opt = "book" 218 | else: 219 | opt = "movie" 220 | 221 | subject_id = args.Subject_ID 222 | if "," in subject_id: 223 | ls_id = subject_id.split(",") 224 | for id in ls_id: 225 | url = f"https://www.douban.com/{opt}/subject/{id.strip()}/" 226 | update_notion_database(url, mode, NOTION_API_TOKEN, BOOK_DATABASE_ID, MOVIE_DATABASE_ID, TV_DATABASE_ID) 227 | else: 228 | url = f"https://www.douban.com/{opt}/subject/{subject_id}/" 229 | update_notion_database(url, mode, NOTION_API_TOKEN, BOOK_DATABASE_ID, MOVIE_DATABASE_ID, TV_DATABASE_ID) 230 | 231 | 232 | if __name__ == "__main__": 233 | main() 234 | --------------------------------------------------------------------------------