├── LICENSE ├── README.md ├── config.py.sample ├── indiepaper ├── __init__.py ├── app.py ├── controllers │ ├── __init__.py │ ├── indieauth.py │ └── root.py ├── extract.py ├── indieauth.py ├── tests │ ├── __init__.py │ ├── test_functional.py │ └── test_units.py └── wsgi.py ├── setup.cfg ├── setup.py └── zappa_settings.json.template /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Jonathan LaCour 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Indiepaper 2 | ========== 3 | 4 | A "read later" service for [Micropub](https://indieweb.org/Micropub) and 5 | [Microsub](https://indieweb.org/Microsub) servers. 6 | 7 | Using Indiepaper 8 | ---------------- 9 | 10 | If you spend a lot of your time in an indie reader like 11 | [Together](https://indieweb.org/Together), 12 | [Indigenous](https://indieweb.org/Indigenous), or 13 | [Monocle](https://indieweb.org/Monocle), you may want to save articles into a 14 | "read later" queue. 15 | 16 | Enter Indiepaper! You can send a special HTTP POST request to Indiepaper, and it 17 | will extract the content of any article on the web (ad free!) and publish it to 18 | the Micropub endpoint of your choosing. You can use this functionality with your 19 | Microsub server, such as [Aperture](https://indieweb.org/Aperture), to publish 20 | these articles for later consumption into a special channel. 21 | 22 | Indiepaper is powered by [mf2py](http://microformats.org/wiki/mf2py) and 23 | [Mercury by Postlight Labs](https://mercury.postlight.com/web-parser/) 24 | 25 | 26 | Indiepaper Public Service 27 | ------------------------- 28 | 29 | Indiepaper is available as a hosted service at 30 | [https://indiepaper.io](https://indiepaper.io). If you visit the website, you'll 31 | also find a useful tool to generate you a "Read Later" bookmarklet for saving 32 | articles via Indiepaper, plus links to an iOS Workflow and a native macOS app. 33 | -------------------------------------------------------------------------------- /config.py.sample: -------------------------------------------------------------------------------- 1 | # Server Specific Configurations 2 | server = { 3 | 'port': '9090', 4 | 'host': '0.0.0.0' 5 | } 6 | 7 | # Pecan Application Configurations 8 | app = { 9 | 'root': 'indiepaper.controllers.root.RootController', 10 | 'modules': ['indiepaper'], 11 | 'template_path': '%(confdir)s/indiepaper/templates', 12 | 'debug': True 13 | } 14 | 15 | mercury = { 16 | 'api_key': 'MERCURY_API_KEY_HERE', 17 | 'endpoint': 'https://mercury.postlight.com/parser' 18 | } 19 | 20 | logging = { 21 | 'root': {'level': 'INFO', 'handlers': ['console']}, 22 | 'loggers': { 23 | 'indiepaper': {'level': 'DEBUG', 'handlers': ['console'], 'propagate': False}, 24 | 'pecan': {'level': 'DEBUG', 'handlers': ['console'], 'propagate': False}, 25 | 'py.warnings': {'handlers': ['console']}, 26 | '__force_dict__': True 27 | }, 28 | 'handlers': { 29 | 'console': { 30 | 'level': 'DEBUG', 31 | 'class': 'logging.StreamHandler', 32 | 'formatter': 'color' 33 | } 34 | }, 35 | 'formatters': { 36 | 'simple': { 37 | 'format': ('%(asctime)s %(levelname)-5.5s [%(name)s]' 38 | '[%(threadName)s] %(message)s') 39 | }, 40 | 'color': { 41 | '()': 'pecan.log.ColorFormatter', 42 | 'format': ('%(asctime)s [%(padded_color_levelname)s] [%(name)s]' 43 | '[%(threadName)s] %(message)s'), 44 | '__force_dict__': True 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /indiepaper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cleverdevil/indiepaper/1f27dc1b8a73ff0c8d3c759d3b25595da9c73d5e/indiepaper/__init__.py -------------------------------------------------------------------------------- /indiepaper/app.py: -------------------------------------------------------------------------------- 1 | from pecan import make_app, conf 2 | from beaker.middleware import SessionMiddleware 3 | 4 | 5 | def setup_app(config): 6 | 7 | app_conf = dict(config.app) 8 | 9 | app = make_app( 10 | app_conf.pop('root'), 11 | logging=getattr(config, 'logging', {}), 12 | **app_conf 13 | ) 14 | 15 | app = SessionMiddleware(app, conf.beaker) 16 | 17 | return app 18 | -------------------------------------------------------------------------------- /indiepaper/controllers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cleverdevil/indiepaper/1f27dc1b8a73ff0c8d3c759d3b25595da9c73d5e/indiepaper/controllers/__init__.py -------------------------------------------------------------------------------- /indiepaper/controllers/indieauth.py: -------------------------------------------------------------------------------- 1 | from uuid import uuid4 2 | from urllib.parse import urlencode 3 | from http import cookies 4 | 5 | from pecan import expose, redirect, request 6 | 7 | from indiepaper import indieauth 8 | 9 | 10 | class IndieAuthController(object): 11 | 12 | @expose() 13 | def index(self, me='', app=False): 14 | if not me: 15 | return 'Must specify a `me` parameter' 16 | 17 | session = request.environ['beaker.session'] 18 | session['me'] = me 19 | session['state'] = str(uuid4()) 20 | 21 | if app: 22 | session['app'] = True 23 | 24 | session.save() 25 | 26 | indieauth.request_authorization(me, session['state']) 27 | 28 | @expose() 29 | def callback(self, code=None, state=None): 30 | session = request.environ['beaker.session'] 31 | try: 32 | assert session['state'] == state 33 | except AssertionError: 34 | return 'Error: state mismatch' 35 | 36 | result = indieauth.request_token(session['me'], code) 37 | if not result: 38 | return 'Error: no token returned from token endpoint' 39 | 40 | target = 'https://www.indiepaper.io/indieauth.html?success=true' 41 | 42 | if session.get('app', False) == True: 43 | target += '&app=true' 44 | 45 | c = cookies.SimpleCookie() 46 | c['indiepaper-me'] = session['me'] 47 | c['indiepaper-me']['domain'] = '.indiepaper.io' 48 | c['indiepaper-me']['path'] = '/' 49 | 50 | c['indiepaper-token'] = result['token'] 51 | c['indiepaper-token']['domain'] = '.indiepaper.io' 52 | c['indiepaper-token']['path'] = '/' 53 | 54 | c['indiepaper-endpoint'] = result['micropub'] 55 | c['indiepaper-endpoint']['domain'] = '.indiepaper.io' 56 | c['indiepaper-endpoint']['path'] = '/' 57 | 58 | headers = [ 59 | ('Set-Cookie', morsel.OutputString()) 60 | for morsel in c.values() 61 | ] 62 | 63 | redirect(target, headers=headers) 64 | -------------------------------------------------------------------------------- /indiepaper/controllers/root.py: -------------------------------------------------------------------------------- 1 | from pecan import expose, redirect, request, abort, conf 2 | from pecan.hooks import HookController, PecanHook 3 | 4 | import requests 5 | 6 | 7 | from indiepaper.extract import parse 8 | from .indieauth import IndieAuthController 9 | 10 | 11 | class CorsHook(PecanHook): 12 | 13 | def after(self, state): 14 | state.response.headers['Access-Control-Allow-Origin'] = '*' 15 | state.response.headers['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS' 16 | state.response.headers['Access-Control-Allow-Headers'] = 'origin, authorization, accept, mp-destination, x-indiepaper-destination' 17 | 18 | 19 | class RootController(HookController): 20 | 21 | __hooks__ = [CorsHook()] 22 | 23 | indieauth = IndieAuthController() 24 | 25 | @expose(generic=True) 26 | def index(self): 27 | if request.method == 'GET': 28 | redirect('https://www.indiepaper.io/') 29 | 30 | 31 | @index.when(method='POST', template='json') 32 | def index_post(self, url, category=None): 33 | # get the micropub information from the headers 34 | destination = request.headers.get('x-indiepaper-destination') 35 | if destination is None: 36 | destination = request.headers.get('mp-destination') 37 | token = request.headers.get('Authorization') 38 | 39 | if not destination: 40 | abort(400, detail='No micropub destination specified in "x-indiepaper-destination" header.') 41 | elif not token: 42 | abort(400, detail='No bearer token provided in "Authorization" header.') 43 | elif not url: 44 | abort(400, detail='No URL provided as an HTTP POST parameter.') 45 | 46 | # parse URL 47 | mf2 = parse(url) 48 | 49 | # add categories, if they're specified 50 | if category is not None: 51 | if isinstance(category, list): 52 | mf2['properties']['category'] = category 53 | elif isinstance(category, str): 54 | mf2['properties']['category'] = [category] 55 | 56 | # send micropub request 57 | if mf2: 58 | self._send_micropub(mf2, destination, token) 59 | else: 60 | return dict(result='failure') 61 | 62 | return dict(result='success') 63 | 64 | 65 | def _send_micropub(self, mf2, destination, token): 66 | result = requests.post( 67 | destination, 68 | json=mf2, 69 | headers={'Authorization': token} 70 | ) 71 | 72 | if result.status_code not in (200, 201): 73 | print("-" * 80) 74 | print(result.status_code) 75 | print(result.text) 76 | print("-" * 80) 77 | 78 | abort(400, detail='Failed to publish to specified endpoint.') 79 | -------------------------------------------------------------------------------- /indiepaper/extract.py: -------------------------------------------------------------------------------- 1 | from pecan import conf 2 | 3 | import requests 4 | import mf2py 5 | 6 | 7 | def parse(url): 8 | result = parse_with_mf2py(url) 9 | 10 | if result: 11 | return result 12 | else: 13 | return parse_with_mercury(url) 14 | 15 | 16 | def parse_with_mercury(url): 17 | response = requests.get( 18 | conf.mercury.endpoint, 19 | params={'url': url}, 20 | headers={'x-api-key': conf.mercury.api_key} 21 | ) 22 | 23 | if response.status_code != 200: 24 | return None 25 | 26 | result = response.json() 27 | 28 | mf2 = { 29 | 'type': ['h-entry'], 30 | 'properties': { 31 | 'name': [result['title']], 32 | 'content': [{ 33 | 'html': result['content'], 34 | 'value': '' 35 | }], 36 | 'syndication': [ 37 | result['url'] 38 | ], 39 | 'url': [ 40 | result['url'] 41 | ] 42 | } 43 | } 44 | 45 | if result.get('author'): 46 | mf2['properties']['author'] = [result['author']] 47 | if result.get('date_published'): 48 | mf2['properties']['published'] = [result['date_published']] 49 | 50 | return mf2 51 | 52 | 53 | def parse_with_mf2py(url): 54 | result = mf2py.parse(url=url) 55 | 56 | if not result: 57 | return None 58 | 59 | if len(result.get('items', [])) == 0: 60 | return None 61 | 62 | item = result['items'][0] 63 | 64 | if not item['properties'].get('name'): 65 | return None 66 | 67 | if not item['properties'].get('content'): 68 | return None 69 | 70 | mf2 = { 71 | 'type': ['h-entry'], 72 | 'properties': { 73 | 'name': item['properties']['name'], 74 | 'content': item['properties']['content'], 75 | 'syndication': [ 76 | url 77 | ], 78 | 'url': [ 79 | url 80 | ] 81 | } 82 | } 83 | 84 | if item['properties'].get('author'): 85 | mf2['properties']['author'] = item['properties']['author'] 86 | if item['properties'].get('published'): 87 | mf2['properties']['published'] = item['properties']['published'] 88 | 89 | return mf2 90 | -------------------------------------------------------------------------------- /indiepaper/indieauth.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlparse, urlencode, parse_qs 2 | 3 | from bs4 import BeautifulSoup, SoupStrainer 4 | from pecan import redirect 5 | 6 | import requests 7 | 8 | 9 | def discover_endpoints(me): 10 | response = requests.get(me) 11 | 12 | all_links = BeautifulSoup( 13 | response.content, 14 | 'html.parser', 15 | parse_only=SoupStrainer(name='link') 16 | ).find_all('link') 17 | 18 | result = {} 19 | 20 | for link in all_links: 21 | rel = link.get('rel', [None])[0] 22 | 23 | if rel in ('authorization_endpoint', 'token_endpoint', 'redirect_uri', 'micropub'): 24 | href = link.get('href', None) 25 | if href: 26 | url = urlparse(href) 27 | if url.scheme in ('http', 'https'): 28 | result.setdefault(rel, set()).add(url) 29 | 30 | return result 31 | 32 | 33 | def request_authorization(me, state): 34 | endpoints = discover_endpoints(me) 35 | 36 | if len(endpoints.get('authorization_endpoint', [])) == 0: 37 | raise Exception('No authorization endpoint discovered.') 38 | 39 | auth_endpoint = endpoints['authorization_endpoint'].pop().geturl() 40 | 41 | ns = { 42 | 'me': me, 43 | 'redirect_uri': 'https://indiepaper.io/indieauth/callback', 44 | 'client_id': 'https://indiepaper.io', 45 | 'state': state, 46 | 'scope': 'save create update', 47 | 'response_type': 'code' 48 | } 49 | auth_target = auth_endpoint + '?' + urlencode(ns) 50 | redirect(auth_target) 51 | 52 | 53 | def request_token(me, code): 54 | endpoints = discover_endpoints(me) 55 | 56 | if len(endpoints.get('token_endpoint', [])) == 0: 57 | raise Exception('No token endpoint discovered.') 58 | 59 | if len(endpoints.get('micropub', [])) == 0: 60 | raise Exception('No micropub endpoint discovered.') 61 | 62 | token_endpoint = endpoints['token_endpoint'].pop().geturl() 63 | micropub_endpoint = endpoints['micropub'].pop().geturl() 64 | 65 | response = requests.post(token_endpoint, data={ 66 | 'grant_type': 'authorization_code', 67 | 'me': me, 68 | 'code': code, 69 | 'redirect_uri': 'https://indiepaper.io/indieauth/callback', 70 | 'client_id': 'https://indiepaper.io' 71 | }) 72 | 73 | if response.status_code != 200: 74 | raise Exception( 75 | 'Error returned from token endpoint: ' + str(response.status_code) 76 | ) 77 | 78 | try: 79 | data = response.json() 80 | return { 81 | 'token': data.get('access_token'), 82 | 'micropub': micropub_endpoint 83 | } 84 | except: 85 | return { 86 | 'token': parse_qs(response.text).get('access_token', [None])[0], 87 | 'micropub': micropub_endpoint 88 | } 89 | -------------------------------------------------------------------------------- /indiepaper/tests/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import TestCase 3 | from pecan import set_config 4 | from pecan.testing import load_test_app 5 | 6 | __all__ = ['FunctionalTest'] 7 | 8 | 9 | class FunctionalTest(TestCase): 10 | """ 11 | Used for functional tests where you need to test your 12 | literal application and its integration with the framework. 13 | """ 14 | 15 | def setUp(self): 16 | self.app = load_test_app(os.path.join( 17 | os.path.dirname(__file__), 18 | 'config.py' 19 | )) 20 | 21 | def tearDown(self): 22 | set_config({}, overwrite=True) 23 | -------------------------------------------------------------------------------- /indiepaper/tests/test_functional.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from webtest import TestApp 3 | from indiepaper.tests import FunctionalTest 4 | 5 | 6 | class TestRootController(FunctionalTest): 7 | 8 | def test_get(self): 9 | response = self.app.get('/') 10 | assert response.status_int == 200 11 | 12 | def test_search(self): 13 | response = self.app.post('/', params={'q': 'RestController'}) 14 | assert response.status_int == 302 15 | assert response.headers['Location'] == ( 16 | 'https://pecan.readthedocs.io/en/latest/search.html' 17 | '?q=RestController' 18 | ) 19 | 20 | def test_get_not_found(self): 21 | response = self.app.get('/a/bogus/url', expect_errors=True) 22 | assert response.status_int == 404 23 | -------------------------------------------------------------------------------- /indiepaper/tests/test_units.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | 4 | class TestUnits(TestCase): 5 | 6 | def test_units(self): 7 | assert 5 * 5 == 25 8 | -------------------------------------------------------------------------------- /indiepaper/wsgi.py: -------------------------------------------------------------------------------- 1 | from pecan.deploy import deploy 2 | app = deploy('config.py') 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [nosetests] 2 | match=^test 3 | where=indiepaper 4 | nocapture=1 5 | cover-package=indiepaper 6 | cover-erase=1 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | try: 3 | from setuptools import setup, find_packages 4 | except ImportError: 5 | from ez_setup import use_setuptools 6 | use_setuptools() 7 | from setuptools import setup, find_packages 8 | 9 | setup( 10 | name='indiepaper', 11 | version='0.1.0', 12 | description='A "read later" service for micropub endpoints.', 13 | author='Jonathan LaCour', 14 | author_email='jonathan@cleverdevil.org', 15 | install_requires=[ 16 | "pecan", 17 | "requests", 18 | "zappa", 19 | "bs4", 20 | "beaker", 21 | "mf2py" 22 | ], 23 | test_suite='indiepaper', 24 | zip_safe=False, 25 | include_package_data=True, 26 | packages=find_packages(exclude=['ez_setup']) 27 | ) 28 | -------------------------------------------------------------------------------- /zappa_settings.json.template: -------------------------------------------------------------------------------- 1 | { 2 | "dev": { 3 | "app_function": "indiepaper.wsgi.app", 4 | "aws_region": "us-east-1", 5 | "profile_name": "default", 6 | "project_name": "indiepaper", 7 | "runtime": "python3.6", 8 | "s3_bucket": "indiepaper-zappa-dev-bucket", 9 | "keep_warm": false 10 | }, 11 | "production": { 12 | "app_function": "indiepaper.wsgi.app", 13 | "aws_region": "us-east-1", 14 | "profile_name": "default", 15 | "project_name": "indiepaper", 16 | "runtime": "python3.6", 17 | "s3_bucket": "indiepaper-zappa-prod-bucket", 18 | "certificate_arn": "AWS_CERT_ARN_HERE", 19 | "domain": "DEPLOYMENT_DOMAIN_HERE", 20 | "keep_warm": false, 21 | "binary_support": false 22 | } 23 | } 24 | --------------------------------------------------------------------------------