├── README.md ├── app.py ├── feedme ├── __init__.py ├── dajia.py ├── jianshu.py ├── one.py ├── util.py └── zhihu.py ├── requirements.txt └── wsgi.py /README.md: -------------------------------------------------------------------------------- 1 | # Feeds by Doocer 2 | 3 | > 这是在做 Doocer Magazine 时写的一个小功能,Magazine 已经暂停掉了,所以从 Doocer 项目里整理出来,改写了一点代码,做成单独的服务。 4 | 5 | 除了知乎专栏,都是摘要显示。 6 | 7 | ## 知乎专栏 8 | 9 | `https://feeds.doocer.com/zhihu/c/{name}` 10 | 11 | 例如: 12 | 13 | + 14 | + 15 | 16 | ## 读读日报 17 | 18 | `https://feeds.doocer.com/zhihu/n/{id}` 19 | 20 | 例如: 21 | 22 | ## 腾讯大家 23 | 24 | 1. `https://feeds.doocer.com/dajia/u/{id}` 25 | 2. `https://feeds.doocer.com/dajia/c/{id}` 26 | 27 | 作者页面: 28 | 29 | + 30 | + 31 | 32 | 专栏页面: 33 | 34 | + 35 | + 36 | 37 | ## 简书 38 | 39 | 1. `https://feeds.doocer.com/jianshu/u/{id}` 40 | 2. `https://feeds.doocer.com/jianshu/c/{id}` 41 | 3. `https://feeds.doocer.com/jianshu/nb/{id}` 42 | 43 | ## 一个 44 | 45 | 1. `https://feeds.doocer.com/one/article` 46 | 2. `https://feeds.doocer.com/one/movie` 47 | 3. `https://feeds.doocer.com/one/music` 48 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from feedme import Application 2 | 3 | app = Application() 4 | 5 | 6 | if __name__ == '__main__': 7 | from werkzeug.serving import run_simple 8 | run_simple('127.0.0.1', 5000, app, use_debugger=True, use_reloader=True) 9 | -------------------------------------------------------------------------------- /feedme/__init__.py: -------------------------------------------------------------------------------- 1 | from werkzeug.exceptions import HTTPException 2 | from werkzeug.routing import Map, Rule 3 | from werkzeug.utils import redirect 4 | from .util import render_feed 5 | from .one import parse_one_feed 6 | from .zhihu import parse_zhihu_zhuanlan, parse_zhihu_news 7 | from .dajia import parse_dajia_author, parse_dajia_channel 8 | from .jianshu import ( 9 | parse_jianshu_user, 10 | parse_jianshu_column, 11 | parse_jianshu_notebook, 12 | ) 13 | 14 | 15 | PARSERS = { 16 | 'zhihu_zhuanlan': parse_zhihu_zhuanlan, 17 | 'zhihu_news': parse_zhihu_news, 18 | 'dajia_user': parse_dajia_author, 19 | 'dajia_channel': parse_dajia_channel, 20 | 'jianshu_user': parse_jianshu_user, 21 | 'jianshu_column': parse_jianshu_column, 22 | 'jianshu_notebook': parse_jianshu_notebook, 23 | 'one_feed': parse_one_feed, 24 | } 25 | 26 | 27 | class Application(object): 28 | def __init__(self): 29 | self.url_map = Map([ 30 | Rule('/', endpoint='home'), 31 | Rule('/zhihu/c/', endpoint='zhihu_zhuanlan'), 32 | Rule('/zhihu/n/', endpoint='zhihu_news'), 33 | Rule('/dajia/u/', endpoint='dajia_user'), 34 | Rule('/dajia/c/', endpoint='dajia_channel'), 35 | Rule('/jianshu/u/', endpoint='jianshu_user'), 36 | Rule('/jianshu/c/', endpoint='jianshu_column'), 37 | Rule('/jianshu/nb/', endpoint='jianshu_notebook'), 38 | Rule('/one/', endpoint='one_feed'), 39 | ]) 40 | 41 | def _dispatch_request(self, environ): 42 | adapter = self.url_map.bind_to_environ(environ) 43 | try: 44 | endpoint, values = adapter.match() 45 | func = PARSERS.get(endpoint) 46 | if func: 47 | if 'slug' in values: 48 | return render_feed(func(values['slug'])) 49 | return render_feed(func()) 50 | return redirect('https://github.com/lepture/feedme', 301) 51 | except HTTPException as e: 52 | return e 53 | 54 | def __call__(self, environ, start_response): 55 | response = self._dispatch_request(environ) 56 | return response(environ, start_response) 57 | -------------------------------------------------------------------------------- /feedme/dajia.py: -------------------------------------------------------------------------------- 1 | import time 2 | import requests 3 | from .util import Entry, Feed 4 | 5 | REFERRER = 'http://dajia.qq.com/author_personal.htm' 6 | WZ_URL = 'http://i.match.qq.com/ninjayc/dajiawenzhanglist' 7 | CHANNEL_URL = 'http://i.match.qq.com/ninjayc/dajialanmu' 8 | USER_AGENT = ( 9 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) ' 10 | 'AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36' 11 | ) 12 | 13 | 14 | def parse_dajia_author(author_id): 15 | headers = {'User-Agent': USER_AGENT, 'Referer': REFERRER} 16 | t = int(time.time()) 17 | 18 | params = {'action': 'wz', 'authorid': author_id, '_': t} 19 | resp = requests.get(WZ_URL, headers=headers, params=params) 20 | if resp.status_code != 200: 21 | return None 22 | 23 | data = resp.json() 24 | entries = data.get('data') 25 | if not entries: 26 | return None 27 | 28 | latest = entries[0] 29 | author = latest.get('name') 30 | url = 'http://dajia.qq.com/author_personal.htm#!/' + author_id 31 | 32 | items = list(format_entries(entries)) 33 | return Feed(title=author, url=url, items=items) 34 | 35 | 36 | def parse_dajia_channel(channel_id): 37 | headers = {'User-Agent': USER_AGENT, 'Referer': REFERRER} 38 | t = int(time.time()) 39 | 40 | params = {'action': 'wz', 'channelid': channel_id, '_': t} 41 | resp = requests.get(WZ_URL, headers=headers, params=params) 42 | if resp.status_code != 200: 43 | return None 44 | 45 | data = resp.json() 46 | entries = data.get('data') 47 | if not entries: 48 | return None 49 | 50 | params = {'action': 'lanmu', 'channelid': channel_id, '_': t} 51 | resp = requests.get(CHANNEL_URL, headers=headers, params=params) 52 | data = resp.json() 53 | title = data['data']['channel']['n_cname'] 54 | url = 'http://dajia.qq.com/tanzi_diceng.htm#!/' + channel_id 55 | items = list(format_entries(entries)) 56 | return Feed(title=title, url=url, items=items) 57 | 58 | 59 | def format_entries(entries): 60 | for item in entries: 61 | published = item['n_publishtime'] 62 | published = published.replace(' ', 'T') + '+08:00' 63 | yield Entry( 64 | title=item['n_title'], 65 | url=item['n_url'], 66 | updated=published, 67 | published=published, 68 | content=item['n_describe'], 69 | author=item['name'], 70 | ) 71 | -------------------------------------------------------------------------------- /feedme/jianshu.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | from .util import Entry, Feed 4 | 5 | HEADERS = {'User-Agent': 'Mozilla/5.0 (compatible; Feedme)'} 6 | 7 | 8 | def parse_jianshu_html(url): 9 | resp = requests.get(url, headers=HEADERS) 10 | soup = BeautifulSoup(resp.text, 'html.parser') 11 | titles = soup.select('div.title a.name') 12 | if not titles: 13 | return None 14 | title = titles[0].get_text() 15 | 16 | items = [] 17 | for el in soup.select('ul.note-list li'): 18 | item = {} 19 | author = el.find('a', class_='blue-link') 20 | if author: 21 | item['author'] = author.get_text().strip() 22 | updated = el.find('span', class_='time') 23 | if updated: 24 | item['updated'] = updated.get('data-shared-at') 25 | item['published'] = item['updated'] 26 | el_title = el.find('a', class_='title') 27 | if el_title: 28 | item['title'] = el_title.get_text().strip() 29 | item['url'] = 'http://www.jianshu.com' + el_title.get('href') 30 | content = el.find('p', class_='abstract') 31 | if content: 32 | item['content'] = content.get_text().strip() 33 | items.append(Entry(**item)) 34 | return Feed(title=title, url=url, items=items) 35 | 36 | 37 | def parse_jianshu_user(slug): 38 | url = 'http://www.jianshu.com/u/' + slug 39 | return parse_jianshu_html(url) 40 | 41 | 42 | def parse_jianshu_column(slug): 43 | url = 'http://www.jianshu.com/c/' + slug 44 | return parse_jianshu_html(url) 45 | 46 | 47 | def parse_jianshu_notebook(slug): 48 | url = 'http://www.jianshu.com/nb/' + slug 49 | return parse_jianshu_html(url) 50 | -------------------------------------------------------------------------------- /feedme/one.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | from .util import Entry, Feed 4 | 5 | TITLE_SUFFIX = u' - 「ONE · 一个」' 6 | TITLE_MAP = { 7 | 'article': u'阅读', 8 | 'movie': u'影视', 9 | 'music': u'音乐', 10 | } 11 | BASE_URL = 'http://m.wufazhuce.com/' 12 | TOKEN_URL = 'http://m.wufazhuce.com/article' 13 | TOKEN_PATTERN = re.compile(r"One\.token = '(.*?)';") 14 | API_URL = 'http://m.wufazhuce.com/article/ajaxlist/0' 15 | HEADERS = {'User-Agent': 'Mozilla/5.0 (compatible; Feedme)'} 16 | 17 | 18 | def parse_one_feed(category): 19 | title_prefix = TITLE_MAP.get(category) 20 | if not title_prefix: 21 | return None 22 | 23 | url = BASE_URL + category 24 | sess = requests.Session() 25 | resp = sess.get(url, headers=HEADERS) 26 | m = TOKEN_PATTERN.findall(resp.text) 27 | 28 | api_url = 'http://m.wufazhuce.com/{}/ajaxlist/0'.format(category) 29 | resp = sess.get(api_url, params={'_token': m[0]}, headers=HEADERS) 30 | result = resp.json() 31 | items = list(format_entries(result['data'])) 32 | title = title_prefix + TITLE_SUFFIX 33 | return Feed(title=title, url=url, items=items) 34 | 35 | 36 | def format_entries(entries): 37 | for item in entries: 38 | updated = _format_time(item['last_update_date']) 39 | published = _format_time(item['post_date']) 40 | yield Entry( 41 | title=item['title'], 42 | url=item['url'], 43 | updated=updated, 44 | published=published, 45 | content=item['forward'], 46 | author=item['author']['user_name'], 47 | ) 48 | 49 | 50 | def _format_time(s): 51 | return s.replace(' ', 'T') + '+08:00' 52 | -------------------------------------------------------------------------------- /feedme/util.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from werkzeug.wrappers import Response 3 | from werkzeug.exceptions import NotFound 4 | 5 | 6 | Feed = namedtuple('Feed', ['title', 'url', 'items']) 7 | Entry = namedtuple('Entry', [ 8 | 'title', 'url', 'author', 9 | 'updated', 'published', 'content' 10 | ]) 11 | 12 | ENTRY_TPL = ''' 13 | <![CDATA[%(title)s]]> 14 | 15 | 16 | %(author)s 17 | %(updated)s 18 | %(published)s 19 | 20 | 21 | ''' 22 | 23 | FEED_HEAD = ''' 24 | 25 | <![CDATA[%(title)s]]> 26 | 27 | 28 | ''' 29 | 30 | 31 | def _iter_feed(feed): 32 | yield FEED_HEAD % feed._asdict() 33 | if feed.items: 34 | item = feed.items[0] 35 | yield '%s' % item.updated 36 | for item in feed.items: 37 | yield ENTRY_TPL % item._asdict() 38 | yield '' 39 | 40 | 41 | def render_feed(feed): 42 | if feed is None: 43 | raise NotFound() 44 | return Response(_iter_feed(feed), content_type='text/xml; charset=utf-8') 45 | -------------------------------------------------------------------------------- /feedme/zhihu.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import requests 3 | from .util import Entry, Feed 4 | 5 | BASE = 'https://zhuanlan.zhihu.com' 6 | HEADERS = {'User-Agent': 'Mozilla/5.0 (compatible; Feedme)'} 7 | 8 | 9 | def parse_zhihu_zhuanlan(slug): 10 | column_url = BASE + '/api/columns/' + slug 11 | 12 | posts_url = column_url + '/articles' 13 | resp = requests.get(posts_url, headers=HEADERS) 14 | if resp.status_code != 200: 15 | return None 16 | 17 | items = [] 18 | for item in resp.json()['data']: 19 | title = item.get('title') 20 | url = item.get('url') 21 | content = item.get('excerpt') 22 | if title and url and content: 23 | author = item.get('author', {}) 24 | items.append(Entry( 25 | title=title, 26 | url=url, 27 | updated=format_time(item.get('updated')), 28 | published=format_time(item.get('created')), 29 | content=content, 30 | author=author.get('name', '') 31 | )) 32 | 33 | resp = requests.get(column_url, headers=HEADERS) 34 | data = resp.json() 35 | title = data.get('title') 36 | url = data.get('url') 37 | return Feed(title=title, url=url, items=items) 38 | 39 | 40 | def parse_zhihu_news(channel_id): 41 | url = 'https://news-at.zhihu.com/api/7/section/' + channel_id 42 | resp = requests.get(url, headers=HEADERS) 43 | if resp.status_code != 200: 44 | return None 45 | 46 | data = resp.json() 47 | stories = data.get('stories') 48 | if not stories: 49 | return None 50 | 51 | items = [] 52 | is_same_title = len({d['title'] for d in stories}) == 1 53 | 54 | for item in stories: 55 | date = item['date'] 56 | published = '{}-{}-{}T00:00:00Z'.format( 57 | date[:4], date[4:6], date[6:8] 58 | ) 59 | 60 | if is_same_title: 61 | title = item['display_date'] 62 | else: 63 | title = item['title'] 64 | 65 | content = '

{}

'.format(title) 66 | images = item.get('images') 67 | for src in images: 68 | content += ''.format(src) 69 | 70 | items.append(Entry( 71 | title=title, 72 | url='https://daily.zhihu.com/story/{}'.format(item['id']), 73 | updated=published, 74 | published=published, 75 | content=content, 76 | author='' 77 | )) 78 | 79 | title = data['name'] 80 | url = 'https://daily.zhihu.com/' 81 | return Feed(title=title, url=url, items=items) 82 | 83 | 84 | def format_time(t): 85 | d = datetime.datetime.fromtimestamp(t) 86 | return d.strftime('%Y-%m-%dT%H:%M:%SZ') 87 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | werkzeug 2 | beautifulsoup4 3 | -------------------------------------------------------------------------------- /wsgi.py: -------------------------------------------------------------------------------- 1 | from feedme import Application 2 | 3 | app = Application() 4 | --------------------------------------------------------------------------------