{}

├── README.md
├── app.py
├── feedme
    ├── __init__.py
    ├── dajia.py
    ├── jianshu.py
    ├── one.py
    ├── util.py
    └── zhihu.py
├── requirements.txt
└── wsgi.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Feeds by Doocer
 2 | 
 3 | > 这是在做 Doocer Magazine 时写的一个小功能，Magazine 已经暂停掉了，所以从 Doocer 项目里整理出来，改写了一点代码，做成单独的服务。
 4 | 
 5 | 除了知乎专栏，都是摘要显示。
 6 | 
 7 | ## 知乎专栏
 8 | 
 9 | `https://feeds.doocer.com/zhihu/c/{name}`
10 | 
11 | 例如：
12 | 
13 | + <https://zhuanlan.zhihu.com/maboyong>
14 | + <https://feeds.doocer.com/zhihu/c/maboyong>
15 | 
16 | ## 读读日报
17 | 
18 | `https://feeds.doocer.com/zhihu/n/{id}`
19 | 
20 | 例如：<https://feeds.doocer.com/zhihu/n/35>
21 | 
22 | ## 腾讯大家
23 | 
24 | 1. `https://feeds.doocer.com/dajia/u/{id}`
25 | 2. `https://feeds.doocer.com/dajia/c/{id}`
26 | 
27 | 作者页面：
28 | 
29 | + <http://dajia.qq.com/author_personal.htm#!/587>
30 | + <https://feeds.doocer.com/dajia/u/587>
31 | 
32 | 专栏页面：
33 | 
34 | + <http://dajia.qq.com/tanzi_diceng.htm#!/301>
35 | + <https://feeds.doocer.com/dajia/c/301>
36 | 
37 | ## 简书
38 | 
39 | 1. `https://feeds.doocer.com/jianshu/u/{id}`
40 | 2. `https://feeds.doocer.com/jianshu/c/{id}`
41 | 3. `https://feeds.doocer.com/jianshu/nb/{id}`
42 | 
43 | ## 一个
44 | 
45 | 1. `https://feeds.doocer.com/one/article`
46 | 2. `https://feeds.doocer.com/one/movie`
47 | 3. `https://feeds.doocer.com/one/music`
48 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | from feedme import Application
2 | 
3 | app = Application()
4 | 
5 | 
6 | if __name__ == '__main__':
7 |     from werkzeug.serving import run_simple
8 |     run_simple('127.0.0.1', 5000, app, use_debugger=True, use_reloader=True)
9 | 


--------------------------------------------------------------------------------
/feedme/__init__.py:
--------------------------------------------------------------------------------
 1 | from werkzeug.exceptions import HTTPException
 2 | from werkzeug.routing import Map, Rule
 3 | from werkzeug.utils import redirect
 4 | from .util import render_feed
 5 | from .one import parse_one_feed
 6 | from .zhihu import parse_zhihu_zhuanlan, parse_zhihu_news
 7 | from .dajia import parse_dajia_author, parse_dajia_channel
 8 | from .jianshu import (
 9 |     parse_jianshu_user,
10 |     parse_jianshu_column,
11 |     parse_jianshu_notebook,
12 | )
13 | 
14 | 
15 | PARSERS = {
16 |     'zhihu_zhuanlan': parse_zhihu_zhuanlan,
17 |     'zhihu_news': parse_zhihu_news,
18 |     'dajia_user': parse_dajia_author,
19 |     'dajia_channel': parse_dajia_channel,
20 |     'jianshu_user': parse_jianshu_user,
21 |     'jianshu_column': parse_jianshu_column,
22 |     'jianshu_notebook': parse_jianshu_notebook,
23 |     'one_feed': parse_one_feed,
24 | }
25 | 
26 | 
27 | class Application(object):
28 |     def __init__(self):
29 |         self.url_map = Map([
30 |             Rule('/', endpoint='home'),
31 |             Rule('/zhihu/c/<slug>', endpoint='zhihu_zhuanlan'),
32 |             Rule('/zhihu/n/<slug>', endpoint='zhihu_news'),
33 |             Rule('/dajia/u/<slug>', endpoint='dajia_user'),
34 |             Rule('/dajia/c/<slug>', endpoint='dajia_channel'),
35 |             Rule('/jianshu/u/<slug>', endpoint='jianshu_user'),
36 |             Rule('/jianshu/c/<slug>', endpoint='jianshu_column'),
37 |             Rule('/jianshu/nb/<slug>', endpoint='jianshu_notebook'),
38 |             Rule('/one/<slug>', endpoint='one_feed'),
39 |         ])
40 | 
41 |     def _dispatch_request(self, environ):
42 |         adapter = self.url_map.bind_to_environ(environ)
43 |         try:
44 |             endpoint, values = adapter.match()
45 |             func = PARSERS.get(endpoint)
46 |             if func:
47 |                 if 'slug' in values:
48 |                     return render_feed(func(values['slug']))
49 |                 return render_feed(func())
50 |             return redirect('https://github.com/lepture/feedme', 301)
51 |         except HTTPException as e:
52 |             return e
53 | 
54 |     def __call__(self, environ, start_response):
55 |         response = self._dispatch_request(environ)
56 |         return response(environ, start_response)
57 | 


--------------------------------------------------------------------------------
/feedme/dajia.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import requests
 3 | from .util import Entry, Feed
 4 | 
 5 | REFERRER = 'http://dajia.qq.com/author_personal.htm'
 6 | WZ_URL = 'http://i.match.qq.com/ninjayc/dajiawenzhanglist'
 7 | CHANNEL_URL = 'http://i.match.qq.com/ninjayc/dajialanmu'
 8 | USER_AGENT = (
 9 |     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) '
10 |     'AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36'
11 | )
12 | 
13 | 
14 | def parse_dajia_author(author_id):
15 |     headers = {'User-Agent': USER_AGENT, 'Referer': REFERRER}
16 |     t = int(time.time())
17 | 
18 |     params = {'action': 'wz', 'authorid': author_id, '_': t}
19 |     resp = requests.get(WZ_URL, headers=headers, params=params)
20 |     if resp.status_code != 200:
21 |         return None
22 | 
23 |     data = resp.json()
24 |     entries = data.get('data')
25 |     if not entries:
26 |         return None
27 | 
28 |     latest = entries[0]
29 |     author = latest.get('name')
30 |     url = 'http://dajia.qq.com/author_personal.htm#!/' + author_id
31 | 
32 |     items = list(format_entries(entries))
33 |     return Feed(title=author, url=url, items=items)
34 | 
35 | 
36 | def parse_dajia_channel(channel_id):
37 |     headers = {'User-Agent': USER_AGENT, 'Referer': REFERRER}
38 |     t = int(time.time())
39 | 
40 |     params = {'action': 'wz', 'channelid': channel_id, '_': t}
41 |     resp = requests.get(WZ_URL, headers=headers, params=params)
42 |     if resp.status_code != 200:
43 |         return None
44 | 
45 |     data = resp.json()
46 |     entries = data.get('data')
47 |     if not entries:
48 |         return None
49 | 
50 |     params = {'action': 'lanmu', 'channelid': channel_id, '_': t}
51 |     resp = requests.get(CHANNEL_URL, headers=headers, params=params)
52 |     data = resp.json()
53 |     title = data['data']['channel']['n_cname']
54 |     url = 'http://dajia.qq.com/tanzi_diceng.htm#!/' + channel_id
55 |     items = list(format_entries(entries))
56 |     return Feed(title=title, url=url, items=items)
57 | 
58 | 
59 | def format_entries(entries):
60 |     for item in entries:
61 |         published = item['n_publishtime']
62 |         published = published.replace(' ', 'T') + '+08:00'
63 |         yield Entry(
64 |             title=item['n_title'],
65 |             url=item['n_url'],
66 |             updated=published,
67 |             published=published,
68 |             content=item['n_describe'],
69 |             author=item['name'],
70 |         )
71 | 


--------------------------------------------------------------------------------
/feedme/jianshu.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | from .util import Entry, Feed
 4 | 
 5 | HEADERS = {'User-Agent': 'Mozilla/5.0 (compatible; Feedme)'}
 6 | 
 7 | 
 8 | def parse_jianshu_html(url):
 9 |     resp = requests.get(url, headers=HEADERS)
10 |     soup = BeautifulSoup(resp.text, 'html.parser')
11 |     titles = soup.select('div.title a.name')
12 |     if not titles:
13 |         return None
14 |     title = titles[0].get_text()
15 | 
16 |     items = []
17 |     for el in soup.select('ul.note-list li'):
18 |         item = {}
19 |         author = el.find('a', class_='blue-link')
20 |         if author:
21 |             item['author'] = author.get_text().strip()
22 |         updated = el.find('span', class_='time')
23 |         if updated:
24 |             item['updated'] = updated.get('data-shared-at')
25 |             item['published'] = item['updated']
26 |         el_title = el.find('a', class_='title')
27 |         if el_title:
28 |             item['title'] = el_title.get_text().strip()
29 |             item['url'] = 'http://www.jianshu.com' + el_title.get('href')
30 |         content = el.find('p', class_='abstract')
31 |         if content:
32 |             item['content'] = content.get_text().strip()
33 |         items.append(Entry(**item))
34 |     return Feed(title=title, url=url, items=items)
35 | 
36 | 
37 | def parse_jianshu_user(slug):
38 |     url = 'http://www.jianshu.com/u/' + slug
39 |     return parse_jianshu_html(url)
40 | 
41 | 
42 | def parse_jianshu_column(slug):
43 |     url = 'http://www.jianshu.com/c/' + slug
44 |     return parse_jianshu_html(url)
45 | 
46 | 
47 | def parse_jianshu_notebook(slug):
48 |     url = 'http://www.jianshu.com/nb/' + slug
49 |     return parse_jianshu_html(url)
50 | 


--------------------------------------------------------------------------------
/feedme/one.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import requests
 3 | from .util import Entry, Feed
 4 | 
 5 | TITLE_SUFFIX = u' - 「ONE · 一个」'
 6 | TITLE_MAP = {
 7 |     'article': u'阅读',
 8 |     'movie': u'影视',
 9 |     'music': u'音乐',
10 | }
11 | BASE_URL = 'http://m.wufazhuce.com/'
12 | TOKEN_URL = 'http://m.wufazhuce.com/article'
13 | TOKEN_PATTERN = re.compile(r"One\.token = '(.*?)';")
14 | API_URL = 'http://m.wufazhuce.com/article/ajaxlist/0'
15 | HEADERS = {'User-Agent': 'Mozilla/5.0 (compatible; Feedme)'}
16 | 
17 | 
18 | def parse_one_feed(category):
19 |     title_prefix = TITLE_MAP.get(category)
20 |     if not title_prefix:
21 |         return None
22 | 
23 |     url = BASE_URL + category
24 |     sess = requests.Session()
25 |     resp = sess.get(url, headers=HEADERS)
26 |     m = TOKEN_PATTERN.findall(resp.text)
27 | 
28 |     api_url = 'http://m.wufazhuce.com/{}/ajaxlist/0'.format(category)
29 |     resp = sess.get(api_url, params={'_token': m[0]}, headers=HEADERS)
30 |     result = resp.json()
31 |     items = list(format_entries(result['data']))
32 |     title = title_prefix + TITLE_SUFFIX
33 |     return Feed(title=title, url=url, items=items)
34 | 
35 | 
36 | def format_entries(entries):
37 |     for item in entries:
38 |         updated = _format_time(item['last_update_date'])
39 |         published = _format_time(item['post_date'])
40 |         yield Entry(
41 |             title=item['title'],
42 |             url=item['url'],
43 |             updated=updated,
44 |             published=published,
45 |             content=item['forward'],
46 |             author=item['author']['user_name'],
47 |         )
48 | 
49 | 
50 | def _format_time(s):
51 |     return s.replace(' ', 'T') + '+08:00'
52 | 


--------------------------------------------------------------------------------
/feedme/util.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | from werkzeug.wrappers import Response
 3 | from werkzeug.exceptions import NotFound
 4 | 
 5 | 
 6 | Feed = namedtuple('Feed', ['title', 'url', 'items'])
 7 | Entry = namedtuple('Entry', [
 8 |     'title', 'url', 'author',
 9 |     'updated', 'published', 'content'
10 | ])
11 | 
12 | ENTRY_TPL = '''<entry>
13 | <title><![CDATA[%(title)s]]></title>
14 | <link href="%(url)s"/>
15 | <id><![CDATA[%(url)s]]></id>
16 | <author><name>%(author)s</name></author>
17 | <updated>%(updated)s</updated>
18 | <published>%(published)s</published>
19 | <content type="html"><![CDATA[%(content)s]]></content>
20 | </entry>
21 | '''
22 | 
23 | FEED_HEAD = '''<?xml version="1.0" encoding="utf-8"?>
24 | <feed xmlns="http://www.w3.org/2005/Atom">
25 | <title><![CDATA[%(title)s]]></title>
26 | <link href="%(url)s"/>
27 | <id><![CDATA[%(url)s]]></id>
28 | '''
29 | 
30 | 
31 | def _iter_feed(feed):
32 |     yield FEED_HEAD % feed._asdict()
33 |     if feed.items:
34 |         item = feed.items[0]
35 |         yield '<updated>%s</updated>' % item.updated
36 |         for item in feed.items:
37 |             yield ENTRY_TPL % item._asdict()
38 |     yield '</feed>'
39 | 
40 | 
41 | def render_feed(feed):
42 |     if feed is None:
43 |         raise NotFound()
44 |     return Response(_iter_feed(feed), content_type='text/xml; charset=utf-8')
45 | 


--------------------------------------------------------------------------------
/feedme/zhihu.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import requests
 3 | from .util import Entry, Feed
 4 | 
 5 | BASE = 'https://zhuanlan.zhihu.com'
 6 | HEADERS = {'User-Agent': 'Mozilla/5.0 (compatible; Feedme)'}
 7 | 
 8 | 
 9 | def parse_zhihu_zhuanlan(slug):
10 |     column_url = BASE + '/api/columns/' + slug
11 | 
12 |     posts_url = column_url + '/articles'
13 |     resp = requests.get(posts_url, headers=HEADERS)
14 |     if resp.status_code != 200:
15 |         return None
16 | 
17 |     items = []
18 |     for item in resp.json()['data']:
19 |         title = item.get('title')
20 |         url = item.get('url')
21 |         content = item.get('excerpt')
22 |         if title and url and content:
23 |             author = item.get('author', {})
24 |             items.append(Entry(
25 |                 title=title,
26 |                 url=url,
27 |                 updated=format_time(item.get('updated')),
28 |                 published=format_time(item.get('created')),
29 |                 content=content,
30 |                 author=author.get('name', '')
31 |             ))
32 | 
33 |     resp = requests.get(column_url, headers=HEADERS)
34 |     data = resp.json()
35 |     title = data.get('title')
36 |     url = data.get('url')
37 |     return Feed(title=title, url=url, items=items)
38 | 
39 | 
40 | def parse_zhihu_news(channel_id):
41 |     url = 'https://news-at.zhihu.com/api/7/section/' + channel_id
42 |     resp = requests.get(url, headers=HEADERS)
43 |     if resp.status_code != 200:
44 |         return None
45 | 
46 |     data = resp.json()
47 |     stories = data.get('stories')
48 |     if not stories:
49 |         return None
50 | 
51 |     items = []
52 |     is_same_title = len({d['title'] for d in stories}) == 1
53 | 
54 |     for item in stories:
55 |         date = item['date']
56 |         published = '{}-{}-{}T00:00:00Z'.format(
57 |             date[:4], date[4:6], date[6:8]
58 |         )
59 | 
60 |         if is_same_title:
61 |             title = item['display_date']
62 |         else:
63 |             title = item['title']
64 | 
65 |         content = '<h1>{}</h1>'.format(title)
66 |         images = item.get('images')
67 |         for src in images:
68 |             content += '<img src="{}"/>'.format(src)
69 | 
70 |         items.append(Entry(
71 |             title=title,
72 |             url='https://daily.zhihu.com/story/{}'.format(item['id']),
73 |             updated=published,
74 |             published=published,
75 |             content=content,
76 |             author=''
77 |         ))
78 | 
79 |     title = data['name']
80 |     url = 'https://daily.zhihu.com/'
81 |     return Feed(title=title, url=url, items=items)
82 | 
83 | 
84 | def format_time(t):
85 |     d = datetime.datetime.fromtimestamp(t)
86 |     return d.strftime('%Y-%m-%dT%H:%M:%SZ')
87 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | werkzeug
2 | beautifulsoup4
3 | 


--------------------------------------------------------------------------------
/wsgi.py:
--------------------------------------------------------------------------------
1 | from feedme import Application
2 | 
3 | app = Application()
4 | 


--------------------------------------------------------------------------------