-------------------------------------------------------------------------------- /assets/ref_header.tmpl: -------------------------------------------------------------------------------- 1 |
 {}: {}
-------------------------------------------------------------------------------- /assets/sub.tmpl: -------------------------------------------------------------------------------- 1 | <{} style="margin-top: 30px; margin-bottom: 15px; padding: 0px; font-weight: bold; color: black; font-size: {}px;">{}{}> 2 | -------------------------------------------------------------------------------- /sync.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ##public/upload_news.py 3 | # -*- coding: utf-8 -*- 4 | """ 5 | 推送文章到微信公众号 6 | """ 7 | from calendar import c 8 | from datetime import datetime 9 | from datetime import timedelta 10 | from weakref import ref 11 | from pyquery import PyQuery 12 | from datetime import date, timedelta 13 | import time 14 | import html 15 | import urllib 16 | import markdown 17 | from markdown.extensions import codehilite 18 | import os 19 | import hashlib 20 | import pickle 21 | from pathlib import Path 22 | from werobot import WeRoBot 23 | import requests 24 | import json 25 | import urllib.request 26 | import random 27 | import string 28 | 29 | CACHE = {} 30 | 31 | CACHE_STORE = "cache.bin" 32 | 33 | def dump_cache(): 34 | fp = open(CACHE_STORE, "wb") 35 | pickle.dump(CACHE, fp) 36 | 37 | def init_cache(): 38 | global CACHE 39 | if os.path.exists(CACHE_STORE): 40 | fp = open(CACHE_STORE, "rb") 41 | CACHE = pickle.load(fp) 42 | #print(CACHE) 43 | return 44 | dump_cache() 45 | 46 | 47 | class NewClient: 48 | 49 | def __init__(self): 50 | self.__accessToken = '' 51 | self.__leftTime = 0 52 | 53 | def __real_get_access_token(self): 54 | postUrl = ("https://api.weixin.qq.com/cgi-bin/token?grant_type=" 55 | "client_credential&appid=%s&secret=%s" % (os.getenv('WECHAT_APP_ID'), os.getenv('WECHAT_APP_SECRET'))) 56 | urlResp = urllib.request.urlopen(postUrl) 57 | urlResp = json.loads(urlResp.read()) 58 | self.__accessToken = urlResp['access_token'] 59 | self.__leftTime = urlResp['expires_in'] 60 | 61 | def get_access_token(self): 62 | if self.__leftTime < 10: 63 | self.__real_get_access_token() 64 | return self.__accessToken 65 | 66 | def Client(): 67 | robot = WeRoBot() 68 | robot.config["APP_ID"] = os.getenv('WECHAT_APP_ID') 69 | robot.config["APP_SECRET"] = os.getenv('WECHAT_APP_SECRET') 70 | client = robot.client 71 | token = client.grant_token() 72 | return client, token 73 | 74 | def cache_get(key): 75 | if key in CACHE: 76 | return CACHE[key] 77 | return None 78 | 79 | 80 | def file_digest(file_path): 81 | """ 82 | 计算文件的md5值 83 | """ 84 | md5 = hashlib.md5() 85 | with open(file_path, 'rb') as f: 86 | md5.update(f.read()) 87 | return md5.hexdigest() 88 | 89 | def cache_update(file_path): 90 | digest = file_digest(file_path) 91 | CACHE[digest] = "{}:{}".format(file_path, datetime.now()) 92 | dump_cache() 93 | 94 | def file_processed(file_path): 95 | digest = file_digest(file_path) 96 | return cache_get(digest) != None 97 | 98 | def upload_image_from_path(image_path): 99 | image_digest = file_digest(image_path) 100 | res = cache_get(image_digest) 101 | if res != None: 102 | return res[0], res[1] 103 | client, _ = Client() 104 | print("uploading image {}".format(image_path)) 105 | try: 106 | media_json = client.upload_permanent_media("image", open(image_path, "rb")) ##永久素材 107 | media_id = media_json['media_id'] 108 | media_url = media_json['url'] 109 | CACHE[image_digest] = [media_id, media_url] 110 | dump_cache() 111 | print("file: {} => media_id: {}".format(image_path, media_id)) 112 | return media_id, media_url 113 | except Exception as e: 114 | print("upload image error: {}".format(e)) 115 | return None, None 116 | 117 | def upload_image(img_url): 118 | """ 119 | * 上传临时素材 120 | * 1、临时素材media_id是可复用的。 121 | * 2、媒体文件在微信后台保存时间为3天,即3天后media_id失效。 122 | * 3、上传临时素材的格式、大小限制与公众平台官网一致。 123 | """ 124 | resource = urllib.request.urlopen(img_url) 125 | name = img_url.split("/")[-1] 126 | f_name = "/tmp/{}".format(name) 127 | if "." not in f_name: 128 | f_name = f_name + ".png" 129 | with open(f_name, 'wb') as f: 130 | f.write(resource.read()) 131 | return upload_image_from_path(f_name) 132 | 133 | def get_images_from_markdown(content): 134 | lines = content.split('\n') 135 | images = [] 136 | for line in lines: 137 | line = line.strip() 138 | if line.startswith('![') and line.endswith(')'): 139 | image = line.split('(')[1].split(')')[0].strip() 140 | images.append(image) 141 | return images 142 | 143 | def fetch_attr(content, key): 144 | """ 145 | 从markdown文件中提取属性 146 | """ 147 | lines = content.split('\n') 148 | for line in lines: 149 | if line.startswith(key): 150 | return line.split(':')[1].strip() 151 | return "" 152 | 153 | def render_markdown(content): 154 | exts = ['markdown.extensions.extra', 155 | 'markdown.extensions.tables', 156 | 'markdown.extensions.toc', 157 | 'markdown.extensions.sane_lists', 158 | codehilite.makeExtension( 159 | guess_lang=False, 160 | noclasses=True, 161 | pygments_style='monokai' 162 | ),] 163 | post = "".join(content.split("---\n")[2:]) 164 | html = markdown.markdown(post, extensions=exts) 165 | open("origi.html", "w").write(html) 166 | return css_beautify(html) 167 | 168 | def update_images_urls(content, uploaded_images): 169 | for image, meta in uploaded_images.items(): 170 | orig = "({})".format(image) 171 | new = "({})".format(meta[1]) 172 | #print("{} -> {}".format(orig, new)) 173 | content = content.replace(orig, new) 174 | return content 175 | 176 | def replace_para(content): 177 | res = [] 178 | for line in content.split("\n"): 179 | if line.startswith(""): 180 | line = line.replace("
", gen_css("para"))
181 | res.append(line)
182 | return "\n".join(res)
183 |
184 | def gen_css(path, *args):
185 | tmpl = open("./assets/{}.tmpl".format(path), "r").read()
186 | return tmpl.format(*args)
187 |
188 | def replace_header(content):
189 | res = []
190 | for line in content.split("\n"):
191 | l = line.strip()
192 | if l.startswith("""".format(line.attr('alt'), line.attr('src'))
234 | figure = gen_css("figure", link, line.attr('alt'))
235 | content = content.replace(link, figure)
236 | return content
237 |
238 | def format_fix(content):
239 | # content = content.replace("
\n
")
241 | # content = content.replace("
", "\n
")
243 | content = content.replace("", "\n")
244 | content = content.replace("background: #272822", gen_css("code"))
245 | content = content.replace("""
", """", """
""")
246 | return content
247 |
248 | def css_beautify(content):
249 | content = replace_para(content)
250 | content = replace_header(content)
251 | content = replace_links(content)
252 | content = format_fix(content)
253 | content = fix_image(content)
254 | content = gen_css("header") + content + "