├── .gitignore
├── ExportMD.py
├── README.MD
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | yuque
2 | .userinfo
3 |
--------------------------------------------------------------------------------
/ExportMD.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | # -----------------------------------------
3 | # createTime : 2021-08-17
4 | # author : Truda
5 | # email : truda8@pm.me
6 | # description: 自动导出语雀知识库为Markdown格式
7 | # -----------------------------------------
8 |
9 | from prettytable import PrettyTable
10 | import re
11 | import os
12 | import time
13 | import aiohttp
14 | import asyncio
15 | from urllib import parse
16 | from PyInquirer import prompt, Separator
17 | from examples import custom_style_2
18 | from colr import color
19 | from cfonts import render, say
20 |
21 |
22 | class ExportMD:
23 | def __init__(self):
24 | self.repo_table = PrettyTable(["知识库ID", "名称"])
25 | self.namespace, self.Token = self.get_UserInfo()
26 | self.headers = {
27 | "Content-Type": "application/json",
28 | "User-Agent": "ExportMD",
29 | "X-Auth-Token": self.Token
30 | }
31 | self.repo = {}
32 | self.export_dir = './yuque'
33 |
34 | def print_logo(self):
35 | output = render('ExportMD', colors=['red', 'yellow'], align='center')
36 | print(output)
37 |
38 | # 语雀用户信息
39 | def get_UserInfo(self):
40 | f_name = ".userinfo"
41 | if os.path.isfile(f_name):
42 | with open(f_name, encoding="utf-8") as f:
43 | userinfo = f.read().split("&")
44 | else:
45 | namespace = input("请输入语雀namespace:")
46 | Token = input("请输入语雀Token:")
47 | userinfo = [namespace, Token]
48 | with open(f_name, "w") as f:
49 | f.write(namespace + "&" + Token)
50 | return userinfo
51 |
52 | # 发送请求
53 | async def req(self, session, api):
54 | url = "https://www.yuque.com/api/v2" + api
55 | # print(url)
56 | async with session.get(url, headers=self.headers) as resp:
57 | result = await resp.json()
58 | return result
59 |
60 | # 获取所有知识库
61 | async def getRepo(self):
62 | api = "/users/%s/repos" % self.namespace
63 | async with aiohttp.ClientSession() as session:
64 | result = await self.req(session, api)
65 | for repo in result.get('data'):
66 | repo_id = str(repo['id'])
67 | repo_name = repo['name']
68 | self.repo[repo_name] = repo_id
69 | self.repo_table.add_row([repo_id, repo_name])
70 |
71 | # 获取一个知识库的文档列表
72 | async def get_docs(self, repo_id):
73 | api = "/repos/%s/docs" % repo_id
74 | async with aiohttp.ClientSession() as session:
75 | result = await self.req(session, api)
76 | docs = {}
77 | for doc in result.get('data'):
78 | title = doc['title']
79 | slug = doc['slug']
80 | docs[slug] = title
81 | return docs
82 |
83 | # 获取正文 Markdown 源代码
84 | async def get_body(self, repo_id, slug):
85 | api = "/repos/%s/docs/%s" % (repo_id, slug)
86 | async with aiohttp.ClientSession() as session:
87 | result = await self.req(session, api)
88 | body = result['data']['body']
89 | body = re.sub("","", body) # 正则去除语雀导出的标签
90 | body = re.sub("\x00", "", body) # 去除不可见字符\x00
91 | body = re.sub("\x05", "", body) # 去除不可见字符\x05
92 | body = re.sub(r'\
!\[image.png\]',"\n![image.png]",body) # 正则去除语雀导出的图片后紧跟的
标签
93 | body = re.sub(r'\)\
', ")\n", body) # 正则去除语雀导出的图片后紧跟的
标签
94 | return body
95 |
96 | # 选择知识库
97 | def selectRepo(self):
98 | choices = [{"name": repo_name} for repo_name, _ in self.repo.items()]
99 | choices.insert(0, Separator('=== 知识库列表 ==='))
100 | questions = [
101 | {
102 | 'type': 'checkbox',
103 | 'qmark': '>>>',
104 | 'message': '选择知识库',
105 | 'name': 'repo',
106 | 'choices': choices
107 | }
108 | ]
109 | repo_name_list = prompt(questions, style=custom_style_2)
110 | return repo_name_list["repo"]
111 |
112 | # 创建文件夹
113 | def mkDir(self, dir):
114 | isExists = os.path.exists(dir)
115 | if not isExists:
116 | os.makedirs(dir)
117 |
118 | # 获取文章并执行保存
119 | async def download_md(self, repo_id, slug, repo_name, title):
120 | """
121 | :param repo_id: 知识库id
122 | :param slug: 文章id
123 | :param repo_name: 知识库名称
124 | :param title: 文章名称
125 | :return: none
126 | """
127 | body = await self.get_body(repo_id, slug)
128 | new_body, image_list = await self.to_local_image_src(body)
129 |
130 | if image_list:
131 | # 图片保存位置: .yuque//assets/
132 | save_dir = os.path.join(self.export_dir, repo_name, "assets")
133 | self.mkDir(save_dir)
134 | async with aiohttp.ClientSession() as session:
135 | await asyncio.gather(
136 | *(self.download_image(session, image_info, save_dir) for image_info in image_list)
137 | )
138 |
139 | self.save(repo_name, title, new_body)
140 |
141 | print("📑 %s 导出成功!" % color(title, fore='green', style='bright'))
142 |
143 | # 将md里的图片地址替换成本地的图片地址
144 | async def to_local_image_src(self, body):
145 | body = re.sub(r'\
!\[image.png\]',"\n![image.png]",body) # 正则去除语雀导出的图片后紧跟的
标签
146 | body = re.sub(r'\)\
', ")\n", body) # 正则去除语雀导出的图片后紧跟的
标签
147 |
148 | pattern = r"!\[(?P.*?)\]" \
149 | r"\((?Phttps:\/\/cdn\.nlark\.com\/yuque.*\/(?P\d+)\/(?P.*?\.[a-zA-z]+)).*\)"
150 | repl = r""
151 | images = [_.groupdict() for _ in re.finditer(pattern, body)]
152 | new_body = re.sub(pattern, repl, body)
153 | return new_body, images
154 |
155 | # 下载图片
156 | async def download_image(self, session, image_info: dict, save_dir: str):
157 | img_src = image_info['img_src']
158 | filename = image_info["filename"]
159 |
160 | async with session.get(img_src) as resp:
161 | with open(os.path.join(save_dir, filename), 'wb') as f:
162 | f.write(await resp.read())
163 |
164 | # 保存文章
165 | def save(self, repo_name, title, body):
166 | # 将不能作为文件名的字符进行编码
167 | def check_safe_path(path: str):
168 | for char in r'/\<>?:"|*':
169 | path = path.replace(char, parse.quote_plus(char))
170 | return path
171 |
172 | repo_name = check_safe_path(repo_name)
173 | title = check_safe_path(title)
174 | save_path = "./yuque/%s/%s.md" % (repo_name, title)
175 | with open(save_path, "w", encoding="utf-8") as f:
176 | f.write(body)
177 |
178 | async def run(self):
179 | self.print_logo()
180 | await self.getRepo()
181 | repo_name_list = self.selectRepo()
182 |
183 | self.mkDir(self.export_dir) # 创建用于存储知识库文章的文件夹
184 |
185 | # 遍历所选知识库
186 | for repo_name in repo_name_list:
187 | dir_path = self.export_dir + "/" + repo_name.replace("/", "%2F")
188 | dir_path.replace("//", "/")
189 | self.mkDir(dir_path)
190 |
191 | repo_id = self.repo[repo_name]
192 | docs = await self.get_docs(repo_id)
193 |
194 | # 异步导出接口会报错,修改为同步导出,且每次导出等待50ms
195 | for slug in docs:
196 | time.sleep(0.05)
197 | title = docs[slug]
198 | await self.download_md(repo_id, slug, repo_name, title)
199 |
200 | # await asyncio.gather(
201 | # *(self.download_md(repo_id, slug, repo_name, title) for slug, title in docs.items())
202 | # )
203 |
204 | print("\n" + color('🎉 导出完成!', fore='green', style='bright'))
205 | print("已导出到:" + color(os.path.realpath(self.export_dir), fore='green', style='bright'))
206 |
207 |
208 | if __name__ == '__main__':
209 | export = ExportMD()
210 | loop = asyncio.get_event_loop()
211 | loop.run_until_complete(export.run())
212 |
--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
1 | # ExportMD-rectify-pics
2 | ## 1、写在前面
3 |   
4 |
5 | 简介:ExportMD语雀知识库自动导出为 Markdown 格式,支持同时导出多个知识库。
6 | 但ExportMD导出连续三张图片时,会出现错误,只有第一张图片的内容会被识别,本复刻版本增加了正则表达式,修复这一情况
7 |
8 | 本复刻版本无法提交issue,有问题欢迎到[语雀评论区](https://www.yuque.com/duzh929/blog/ocffqg)和我交流!
9 |
10 | 欢迎给我的语雀文档点赞,或者给我复刻的仓库star
11 |
12 | 另外我想说明一下,复刻这个仓库的目的是为了防止重要的笔记丢失,让大家能够有一个本地的markdown笔记备份,但我并不希望语雀因此流失用户,语雀真的是一款非常优秀的笔记软件,我会一直支持下去的!
13 |
14 |
15 | ### 版本v1.0
16 | API导出连着的图片时,会出现很多`
`,这时连续图片中只有第一张图片会被识别
17 | ```
18 | 


19 |
20 | ```
21 | 为了解决这一问题,在原来的ExportMD基础上,我增加了正则表达式,现在连续图片的导出已经没有问题
22 | 这里特别感谢ExportMD!
23 | ### 版本v1.1
24 | 实际使用的时候有遇到一些小问题,主要是接口限制等会导致有时候请求失败
25 | 这里使用[@stone0090](https://github.com/stone0090/ExportMD-rectify-pics)的解决方案
26 |
27 | https://github.com/stone0090/ExportMD-rectify-pics/blob/bf2b79ef0afaffcad78253cafa1760f45359623e/ExportMD.py#L198
28 |
29 | 修改为同步导出,且每次导出等待100ms
30 | 本人技术水平有限,感谢[@stone0090](https://github.com/stone0090/ExportMD-rectify-pics)!的修复
31 |
32 |
33 | ## 2、安装
34 | ### 环境要求
35 | - python >= 3.6 && python <= 3.9
36 |
37 | ### 安装 python 依赖
38 | ```bash
39 | pip install -r requirements.txt
40 | ```
41 | ---
42 |
43 | ## 3、使用
44 |
45 | ## 获取 namespace
46 | 知识库 namespace:知识库 URL 中路径部分
47 |
48 | 知识库 [https://www.yuque.com/YourYuqueUserName](https://www.yuque.com/YourYuqueUserName) 对应的 *namespace* 为 *YourYuqueUserName*
49 |
50 | ## 获取 token
51 | 
52 | ## 使用 python3 运行
53 | ```bash
54 | python ExportMD.py
55 | ```
56 | ## 输入namespace和token
57 | `⬆ ⬇`移动,`space`选择,`a`全选,`Enter`确认
58 | 
59 |
60 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | prettytable
2 | aiohttp
3 | PyInquirer
4 | Colr
5 | python-cfonts
--------------------------------------------------------------------------------