├── .gitignore ├── README.md ├── assets ├── code.tmpl ├── figure.tmpl ├── header.tmpl ├── link.tmpl ├── para.tmpl ├── ref_header.tmpl ├── ref_link.tmpl └── sub.tmpl └── sync.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | blog-source 3 | code2img 4 | cache.txt 5 | *.log 6 | update.sh 7 | cache.bin 8 | ob 9 | ob-pub 10 | node_modules 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # markdown-to-wechat 2 | 3 | 代码中有硬编码部分,请自行更改。 4 | 5 | ## 安装 6 | 7 | ```bash 8 | pip install markdown Pygments werobot pyquery 9 | ``` 10 | 11 | ## 配置白名单和 token 12 | 13 | 公众号后台路径:设置和开发 -> 基本配置 :填入服务器 IP,生成 token。 14 | 15 | 在 sync.py 中通过环境变量获取 app_id 和 secret: 16 | 17 | ```python 18 | robot.config["APP_ID"] = os.getenv('WECHAT_APP_ID') 19 | robot.config["APP_SECRET"] = os.getenv('WECHAT_APP_SECRET') 20 | ``` 21 | 22 | 把 token 配置到服务器环境变量,然后在服务器上运行 `python3 sync.py` 即可。 23 | 24 | -------------------------------------------------------------------------------- /assets/code.tmpl: -------------------------------------------------------------------------------- 1 | background: #272822; border-radius: 3px; word-wrap: break-word; overflow: scroll; padding: 12px 13px; font-size: 13px; -------------------------------------------------------------------------------- /assets/figure.tmpl: -------------------------------------------------------------------------------- 1 |
{} 2 |
{}
3 |
4 | -------------------------------------------------------------------------------- /assets/header.tmpl: -------------------------------------------------------------------------------- 1 |
2 | -------------------------------------------------------------------------------- /assets/link.tmpl: -------------------------------------------------------------------------------- 1 | {}[{}] -------------------------------------------------------------------------------- /assets/para.tmpl: -------------------------------------------------------------------------------- 1 |

-------------------------------------------------------------------------------- /assets/ref_header.tmpl: -------------------------------------------------------------------------------- 1 |

参考:

2 | -------------------------------------------------------------------------------- /assets/ref_link.tmpl: -------------------------------------------------------------------------------- 1 | [{}]

 {}: {}

-------------------------------------------------------------------------------- /assets/sub.tmpl: -------------------------------------------------------------------------------- 1 | <{} style="margin-top: 30px; margin-bottom: 15px; padding: 0px; font-weight: bold; color: black; font-size: {}px;">{} 2 | -------------------------------------------------------------------------------- /sync.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ##public/upload_news.py 3 | # -*- coding: utf-8 -*- 4 | """ 5 | 推送文章到微信公众号 6 | """ 7 | from calendar import c 8 | from datetime import datetime 9 | from datetime import timedelta 10 | from weakref import ref 11 | from pyquery import PyQuery 12 | from datetime import date, timedelta 13 | import time 14 | import html 15 | import urllib 16 | import markdown 17 | from markdown.extensions import codehilite 18 | import os 19 | import hashlib 20 | import pickle 21 | from pathlib import Path 22 | from werobot import WeRoBot 23 | import requests 24 | import json 25 | import urllib.request 26 | import random 27 | import string 28 | 29 | CACHE = {} 30 | 31 | CACHE_STORE = "cache.bin" 32 | 33 | def dump_cache(): 34 | fp = open(CACHE_STORE, "wb") 35 | pickle.dump(CACHE, fp) 36 | 37 | def init_cache(): 38 | global CACHE 39 | if os.path.exists(CACHE_STORE): 40 | fp = open(CACHE_STORE, "rb") 41 | CACHE = pickle.load(fp) 42 | #print(CACHE) 43 | return 44 | dump_cache() 45 | 46 | 47 | class NewClient: 48 | 49 | def __init__(self): 50 | self.__accessToken = '' 51 | self.__leftTime = 0 52 | 53 | def __real_get_access_token(self): 54 | postUrl = ("https://api.weixin.qq.com/cgi-bin/token?grant_type=" 55 | "client_credential&appid=%s&secret=%s" % (os.getenv('WECHAT_APP_ID'), os.getenv('WECHAT_APP_SECRET'))) 56 | urlResp = urllib.request.urlopen(postUrl) 57 | urlResp = json.loads(urlResp.read()) 58 | self.__accessToken = urlResp['access_token'] 59 | self.__leftTime = urlResp['expires_in'] 60 | 61 | def get_access_token(self): 62 | if self.__leftTime < 10: 63 | self.__real_get_access_token() 64 | return self.__accessToken 65 | 66 | def Client(): 67 | robot = WeRoBot() 68 | robot.config["APP_ID"] = os.getenv('WECHAT_APP_ID') 69 | robot.config["APP_SECRET"] = os.getenv('WECHAT_APP_SECRET') 70 | client = robot.client 71 | token = client.grant_token() 72 | return client, token 73 | 74 | def cache_get(key): 75 | if key in CACHE: 76 | return CACHE[key] 77 | return None 78 | 79 | 80 | def file_digest(file_path): 81 | """ 82 | 计算文件的md5值 83 | """ 84 | md5 = hashlib.md5() 85 | with open(file_path, 'rb') as f: 86 | md5.update(f.read()) 87 | return md5.hexdigest() 88 | 89 | def cache_update(file_path): 90 | digest = file_digest(file_path) 91 | CACHE[digest] = "{}:{}".format(file_path, datetime.now()) 92 | dump_cache() 93 | 94 | def file_processed(file_path): 95 | digest = file_digest(file_path) 96 | return cache_get(digest) != None 97 | 98 | def upload_image_from_path(image_path): 99 | image_digest = file_digest(image_path) 100 | res = cache_get(image_digest) 101 | if res != None: 102 | return res[0], res[1] 103 | client, _ = Client() 104 | print("uploading image {}".format(image_path)) 105 | try: 106 | media_json = client.upload_permanent_media("image", open(image_path, "rb")) ##永久素材 107 | media_id = media_json['media_id'] 108 | media_url = media_json['url'] 109 | CACHE[image_digest] = [media_id, media_url] 110 | dump_cache() 111 | print("file: {} => media_id: {}".format(image_path, media_id)) 112 | return media_id, media_url 113 | except Exception as e: 114 | print("upload image error: {}".format(e)) 115 | return None, None 116 | 117 | def upload_image(img_url): 118 | """ 119 | * 上传临时素材 120 | * 1、临时素材media_id是可复用的。 121 | * 2、媒体文件在微信后台保存时间为3天,即3天后media_id失效。 122 | * 3、上传临时素材的格式、大小限制与公众平台官网一致。 123 | """ 124 | resource = urllib.request.urlopen(img_url) 125 | name = img_url.split("/")[-1] 126 | f_name = "/tmp/{}".format(name) 127 | if "." not in f_name: 128 | f_name = f_name + ".png" 129 | with open(f_name, 'wb') as f: 130 | f.write(resource.read()) 131 | return upload_image_from_path(f_name) 132 | 133 | def get_images_from_markdown(content): 134 | lines = content.split('\n') 135 | images = [] 136 | for line in lines: 137 | line = line.strip() 138 | if line.startswith('![') and line.endswith(')'): 139 | image = line.split('(')[1].split(')')[0].strip() 140 | images.append(image) 141 | return images 142 | 143 | def fetch_attr(content, key): 144 | """ 145 | 从markdown文件中提取属性 146 | """ 147 | lines = content.split('\n') 148 | for line in lines: 149 | if line.startswith(key): 150 | return line.split(':')[1].strip() 151 | return "" 152 | 153 | def render_markdown(content): 154 | exts = ['markdown.extensions.extra', 155 | 'markdown.extensions.tables', 156 | 'markdown.extensions.toc', 157 | 'markdown.extensions.sane_lists', 158 | codehilite.makeExtension( 159 | guess_lang=False, 160 | noclasses=True, 161 | pygments_style='monokai' 162 | ),] 163 | post = "".join(content.split("---\n")[2:]) 164 | html = markdown.markdown(post, extensions=exts) 165 | open("origi.html", "w").write(html) 166 | return css_beautify(html) 167 | 168 | def update_images_urls(content, uploaded_images): 169 | for image, meta in uploaded_images.items(): 170 | orig = "({})".format(image) 171 | new = "({})".format(meta[1]) 172 | #print("{} -> {}".format(orig, new)) 173 | content = content.replace(orig, new) 174 | return content 175 | 176 | def replace_para(content): 177 | res = [] 178 | for line in content.split("\n"): 179 | if line.startswith("

"): 180 | line = line.replace("

", gen_css("para")) 181 | res.append(line) 182 | return "\n".join(res) 183 | 184 | def gen_css(path, *args): 185 | tmpl = open("./assets/{}.tmpl".format(path), "r").read() 186 | return tmpl.format(*args) 187 | 188 | def replace_header(content): 189 | res = [] 190 | for line in content.split("\n"): 191 | l = line.strip() 192 | if l.startswith("") > 0: 193 | tag = l.split(' ')[0].replace('<', '') 194 | value = l.split('>')[1].split('<')[0] 195 | digit = tag[1] 196 | font = (18 + (4 - int(tag[1])) * 2) if (digit >= '0' and digit <= '9') else 18 197 | res.append(gen_css("sub", tag, font, value, tag)) 198 | else: 199 | res.append(line) 200 | return "\n".join(res) 201 | 202 | def replace_links(content): 203 | pq = PyQuery(open('origi.html').read()) 204 | links = pq('a') 205 | refs = [] 206 | index = 1 207 | if len(links) == 0: 208 | return content 209 | for l in links.items(): 210 | link = gen_css("link", l.text(), index) 211 | index += 1 212 | refs.append([l.attr('href'), l.text(), link]) 213 | 214 | for r in refs: 215 | orig = "{}".format(html.escape(r[0]), r[1]) 216 | print(orig) 217 | content = content.replace(orig, r[2]) 218 | content = content + "\n" + gen_css("ref_header") 219 | content = content + """

""" 220 | index = 1 221 | for r in refs: 222 | l = r[2] 223 | line = gen_css("ref_link", index, r[1], r[0]) 224 | index += 1 225 | content += line + "\n" 226 | content = content + "
" 227 | return content 228 | 229 | def fix_image(content): 230 | pq = PyQuery(open('origi.html').read()) 231 | imgs = pq('img') 232 | for line in imgs.items(): 233 | link = """{}""".format(line.attr('alt'), line.attr('src')) 234 | figure = gen_css("figure", link, line.attr('alt')) 235 | content = content.replace(link, figure) 236 | return content 237 | 238 | def format_fix(content): 239 | # content = content.replace("") 241 | # content = content.replace("
    \n
  1. ", "
    1. ") 242 | # content = content.replace("
    2. \n
    ", "
") 243 | content = content.replace("", "\n

") 244 | content = content.replace("background: #272822", gen_css("code")) 245 | content = content.replace("""
""", """
""")
246 |     return content
247 | 
248 | def css_beautify(content):
249 |     content = replace_para(content)
250 |     content = replace_header(content)
251 |     content = replace_links(content)
252 |     content = format_fix(content)
253 |     content = fix_image(content)
254 |     content = gen_css("header") + content + "
" 255 | return content 256 | 257 | 258 | def upload_media_news(post_path): 259 | """ 260 | 上传到微信公众号素材 261 | """ 262 | content = open (post_path , 'r').read() 263 | TITLE = fetch_attr(content, 'title').strip('"').strip('\'') 264 | gen_cover = fetch_attr(content, 'gen_cover').strip('"') 265 | images = get_images_from_markdown(content) 266 | print(TITLE) 267 | if len(images) == 0 or gen_cover == "true" : 268 | letters = string.ascii_lowercase 269 | seed = ''.join(random.choice(letters) for i in range(10)) 270 | print(seed) 271 | images = ["https://picsum.photos/seed/" + seed + "/400/600"] + images 272 | uploaded_images = {} 273 | for image in images: 274 | media_id = '' 275 | media_url = '' 276 | if image.startswith("http"): 277 | media_id, media_url = upload_image(image) 278 | else: 279 | media_id, media_url = upload_image_from_path("./blog-source/source" + image) 280 | if media_id != None: 281 | uploaded_images[image] = [media_id, media_url] 282 | 283 | content = update_images_urls(content, uploaded_images) 284 | 285 | THUMB_MEDIA_ID = (len(images) > 0 and uploaded_images[images[0]][0]) or '' 286 | AUTHOR = 'yukang' 287 | RESULT = render_markdown(content) 288 | link = os.path.basename(post_path).replace('.md', '') 289 | digest = fetch_attr(content, 'subtitle').strip().strip('"').strip('\'') 290 | CONTENT_SOURCE_URL = 'https://catcoding.me/p/{}'.format(link) 291 | 292 | articles = { 293 | 'articles': 294 | [ 295 | { 296 | "title": TITLE, 297 | "thumb_media_id": THUMB_MEDIA_ID, 298 | "author": AUTHOR, 299 | "digest": digest, 300 | "show_cover_pic": 1, 301 | "content": RESULT, 302 | "content_source_url": CONTENT_SOURCE_URL 303 | } 304 | # 若新增的是多图文素材,则此处应有几段articles结构,最多8段 305 | ] 306 | } 307 | 308 | fp = open('./result.html', 'w') 309 | fp.write(RESULT) 310 | fp.close() 311 | 312 | client = NewClient() 313 | token = client.get_access_token() 314 | headers={'Content-type': 'text/plain; charset=utf-8'} 315 | datas = json.dumps(articles, ensure_ascii=False).encode('utf-8') 316 | 317 | postUrl = "https://api.weixin.qq.com/cgi-bin/draft/add?access_token=%s" % token 318 | r = requests.post(postUrl, data=datas, headers=headers) 319 | resp = json.loads(r.text) 320 | print(resp) 321 | media_id = resp['media_id'] 322 | cache_update(post_path) 323 | return resp 324 | 325 | def run(string_date): 326 | #string_date = "2023-03-13" 327 | print(string_date) 328 | pathlist = Path("./blog-source/source/_posts").glob('**/*.md') 329 | for path in pathlist: 330 | path_str = str(path) 331 | content = open (path_str , 'r').read() 332 | date = fetch_attr(content, 'date').strip() 333 | if string_date in date: 334 | if file_processed(path_str): 335 | print("{} has been processed".format(path_str)) 336 | continue 337 | print(path_str) 338 | news_json = upload_media_news(path_str) 339 | print(news_json); 340 | print('successful') 341 | 342 | def daterange(start_date, end_date): 343 | for n in range(int((end_date - start_date).days)): 344 | yield start_date + timedelta(n) 345 | 346 | if __name__ == '__main__': 347 | print("begin sync to wechat") 348 | init_cache() 349 | start_time = time.time() # 开始时间 350 | for x in daterange(datetime.now() - timedelta(days=7), datetime.now() + timedelta(days=2)): 351 | print("start time: {}".format(x.strftime("%m/%d/%Y, %H:%M:%S"))) 352 | string_date = x.strftime('%Y-%m-%d') 353 | print(string_date) 354 | run(string_date) 355 | end_time = time.time() #结束时间 356 | print("程序耗时%f秒." % (end_time - start_time)) 357 | --------------------------------------------------------------------------------