2 | -------------------------------------------------------------------------------- /assets/link.tmpl: -------------------------------------------------------------------------------- 1 | {}^[{}] -------------------------------------------------------------------------------- /assets/para.tmpl: -------------------------------------------------------------------------------- 1 |

-------------------------------------------------------------------------------- /assets/ref_header.tmpl: -------------------------------------------------------------------------------- 1 |

参考:

2 | -------------------------------------------------------------------------------- /assets/ref_link.tmpl: -------------------------------------------------------------------------------- 1 | [{}]

{}: {}

-------------------------------------------------------------------------------- /assets/sub.tmpl: -------------------------------------------------------------------------------- 1 | <{} style="margin-top: 30px; margin-bottom: 15px; padding: 0px; font-weight: bold; color: black; font-size: {}px;">{} 2 | -------------------------------------------------------------------------------- /sync.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ##public/upload_news.py 3 | # -*- coding: utf-8 -*- 4 | """ 5 | 推送文章到微信公众号 6 | """ 7 | from calendar import c 8 | from datetime import datetime 9 | from datetime import timedelta 10 | from weakref import ref 11 | from pyquery import PyQuery 12 | from datetime import date, timedelta 13 | import time 14 | import html 15 | import urllib 16 | import markdown 17 | from markdown.extensions import codehilite 18 | import os 19 | import hashlib 20 | import pickle 21 | from pathlib import Path 22 | from werobot import WeRoBot 23 | import requests 24 | import json 25 | import urllib.request 26 | import random 27 | import string 28 | 29 | CACHE = {} 30 | 31 | CACHE_STORE = "cache.bin" 32 | 33 | def dump_cache(): 34 | fp = open(CACHE_STORE, "wb") 35 | pickle.dump(CACHE, fp) 36 | 37 | def init_cache(): 38 | global CACHE 39 | if os.path.exists(CACHE_STORE): 40 | fp = open(CACHE_STORE, "rb") 41 | CACHE = pickle.load(fp) 42 | #print(CACHE) 43 | return 44 | dump_cache() 45 | 46 | 47 | class NewClient: 48 | 49 | def __init__(self): 50 | self.__accessToken = '' 51 | self.__leftTime = 0 52 | 53 | def __real_get_access_token(self): 54 | postUrl = ("https://api.weixin.qq.com/cgi-bin/token?grant_type=" 55 | "client_credential&appid=%s&secret=%s" % (os.getenv('WECHAT_APP_ID'), os.getenv('WECHAT_APP_SECRET'))) 56 | urlResp = urllib.request.urlopen(postUrl) 57 | urlResp = json.loads(urlResp.read()) 58 | self.__accessToken = urlResp['access_token'] 59 | self.__leftTime = urlResp['expires_in'] 60 | 61 | def get_access_token(self): 62 | if self.__leftTime < 10: 63 | self.__real_get_access_token() 64 | return self.__accessToken 65 | 66 | def Client(): 67 | robot = WeRoBot() 68 | robot.config["APP_ID"] = os.getenv('WECHAT_APP_ID') 69 | robot.config["APP_SECRET"] = os.getenv('WECHAT_APP_SECRET') 70 | client = robot.client 71 | token = client.grant_token() 72 | return client, token 73 | 74 | def cache_get(key): 75 | if key in CACHE: 76 | return CACHE[key] 77 | return None 78 | 79 | 80 | def file_digest(file_path): 81 | """ 82 | 计算文件的md5值 83 | """ 84 | md5 = hashlib.md5() 85 | with open(file_path, 'rb') as f: 86 | md5.update(f.read()) 87 | return md5.hexdigest() 88 | 89 | def cache_update(file_path): 90 | digest = file_digest(file_path) 91 | CACHE[digest] = "{}:{}".format(file_path, datetime.now()) 92 | dump_cache() 93 | 94 | def file_processed(file_path): 95 | digest = file_digest(file_path) 96 | return cache_get(digest) != None 97 | 98 | def upload_image_from_path(image_path): 99 | image_digest = file_digest(image_path) 100 | res = cache_get(image_digest) 101 | if res != None: 102 | return res[0], res[1] 103 | client, _ = Client() 104 | print("uploading image {}".format(image_path)) 105 | try: 106 | media_json = client.upload_permanent_media("image", open(image_path, "rb")) ##永久素材 107 | media_id = media_json['media_id'] 108 | media_url = media_json['url'] 109 | CACHE[image_digest] = [media_id, media_url] 110 | dump_cache() 111 | print("file: {} => media_id: {}".format(image_path, media_id)) 112 | return media_id, media_url 113 | except Exception as e: 114 | print("upload image error: {}".format(e)) 115 | return None, None 116 | 117 | def upload_image(img_url): 118 | """ 119 | * 上传临时素材 120 | * 1、临时素材media_id是可复用的。 121 | * 2、媒体文件在微信后台保存时间为3天，即3天后media_id失效。 122 | * 3、上传临时素材的格式、大小限制与公众平台官网一致。 123 | """ 124 | resource = urllib.request.urlopen(img_url) 125 | name = img_url.split("/")[-1] 126 | f_name = "/tmp/{}".format(name) 127 | if "." not in f_name: 128 | f_name = f_name + ".png" 129 | with open(f_name, 'wb') as f: 130 | f.write(resource.read()) 131 | return upload_image_from_path(f_name) 132 | 133 | def get_images_from_markdown(content): 134 | lines = content.split('\n') 135 | images = [] 136 | for line in lines: 137 | line = line.strip() 138 | if line.startswith('![') and line.endswith(')'): 139 | image = line.split('(')[1].split(')')[0].strip() 140 | images.append(image) 141 | return images 142 | 143 | def fetch_attr(content, key): 144 | """ 145 | 从markdown文件中提取属性 146 | """ 147 | lines = content.split('\n') 148 | for line in lines: 149 | if line.startswith(key): 150 | return line.split(':')[1].strip() 151 | return "" 152 | 153 | def render_markdown(content): 154 | exts = ['markdown.extensions.extra', 155 | 'markdown.extensions.tables', 156 | 'markdown.extensions.toc', 157 | 'markdown.extensions.sane_lists', 158 | codehilite.makeExtension( 159 | guess_lang=False, 160 | noclasses=True, 161 | pygments_style='monokai' 162 | ),] 163 | post = "".join(content.split("---\n")[2:]) 164 | html = markdown.markdown(post, extensions=exts) 165 | open("origi.html", "w").write(html) 166 | return css_beautify(html) 167 | 168 | def update_images_urls(content, uploaded_images): 169 | for image, meta in uploaded_images.items(): 170 | orig = "({})".format(image) 171 | new = "({})".format(meta[1]) 172 | #print("{} -> {}".format(orig, new)) 173 | content = content.replace(orig, new) 174 | return content 175 | 176 | def replace_para(content): 177 | res = [] 178 | for line in content.split("\n"): 179 | if line.startswith("

"): 180 | line = line.replace("

", gen_css("para")) 181 | res.append(line) 182 | return "\n".join(res) 183 | 184 | def gen_css(path, *args): 185 | tmpl = open("./assets/{}.tmpl".format(path), "r").read() 186 | return tmpl.format(*args) 187 | 188 | def replace_header(content): 189 | res = [] 190 | for line in content.split("\n"): 191 | l = line.strip() 192 | if l.startswith("") > 0: 193 | tag = l.split(' ')[0].replace('<', '') 194 | value = l.split('>')[1].split('<')[0] 195 | digit = tag[1] 196 | font = (18 + (4 - int(tag[1])) * 2) if (digit >= '0' and digit <= '9') else 18 197 | res.append(gen_css("sub", tag, font, value, tag)) 198 | else: 199 | res.append(line) 200 | return "\n".join(res) 201 | 202 | def replace_links(content): 203 | pq = PyQuery(open('origi.html').read()) 204 | links = pq('a') 205 | refs = [] 206 | index = 1 207 | if len(links) == 0: 208 | return content 209 | for l in links.items(): 210 | link = gen_css("link", l.text(), index) 211 | index += 1 212 | refs.append([l.attr('href'), l.text(), link]) 213 | 214 | for r in refs: 215 | orig = "{}".format(html.escape(r[0]), r[1]) 216 | print(orig) 217 | content = content.replace(orig, r[2]) 218 | content = content + "\n" + gen_css("ref_header") 219 | content = content + """

", "
- ") 240 | # content = content.replace("
", "

") 241 | # content = content.replace("

", "
1. ") 242 | # content = content.replace("
", "

") 243 | content = content.replace("", "\n

") 244 | content = content.replace("background: #272822", gen_css("code")) 245 | content = content.replace("""

""", """""")
246 |     return content
247 | 
248 | def css_beautify(content):
249 |     content = replace_para(content)
250 |     content = replace_header(content)
251 |     content = replace_links(content)
252 |     content = format_fix(content)
253 |     content = fix_image(content)
254 |     content = gen_css("header") + content + "