├── LICENCE
├── README.md
├── crontab
├── doc
    ├── emissary2.png
    ├── emissary3.png
    ├── emissary4.png
    └── emissary5.png
├── emissary
    ├── __init__.py
    ├── client.py
    ├── config.py
    ├── controllers
    │   ├── __init__.py
    │   ├── cron.py
    │   ├── fetch.py
    │   ├── load.py
    │   ├── log.py
    │   ├── manager.py
    │   ├── parser.py
    │   ├── scripts.py
    │   ├── tui.py
    │   └── utils.py
    ├── models.py
    ├── repl.py
    ├── resources
    │   ├── __init__.py
    │   ├── api_key.py
    │   ├── articles.py
    │   ├── feedgroups.py
    │   └── feeds.py
    └── run.py
├── scripts
    └── hello.py
└── setup.py


/LICENCE:
--------------------------------------------------------------------------------
 1 | Permission is hereby granted, free of charge, to any person
 2 | obtaining a copy of this software and associated documentation
 3 | files (the "Software"), to deal in the Software without
 4 | restriction, including without limitation the rights to use,
 5 | copy, modify, merge, publish, distribute, sublicense, and/or sell
 6 | copies of the Software, and to permit persons to whom the
 7 | Software is furnished to do so, subject to the following
 8 | conditions:
 9 |  
10 | The above copyright notice and this permission notice shall be
11 | included in all copies or substantial portions of the Software.
12 |  
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
17 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 | OTHER DEALINGS IN THE SOFTWARE.
21 | 
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Emissary
 2 | ========
 3 | 
 4 | An intelligence utility / test for researchers, programmers and generally carnivorous primates who want personally curated news archives.
 5 | Emissary is a web content extractor that has a RESTful API and the ability to run pre-store scripts.
 6 | Emissary stores the full text of linked articles from RSS feeds or URLs containing links.
 7 | 
 8 | Documentation lives [here](http://docs.psybernetics.org/).
 9 | 
10 | --------
11 | ![Alt text](doc/emissary4.png?raw=true "ncurses Client")
12 | ![Alt text](doc/emissary3.png?raw=true "Feed Groups")
13 | ![Alt text](doc/emissary2.png?raw=true "Articles")
14 | <pre>
15 | 
16 | Installation requires the python interpreter headers, libevent, libxml2 and libxslt headers.
17 | Optional article compression requires libsnappy. 
18 | All of these can be obtained on debian-based systems with:
19 | sudo apt-get install -y zlib1g-dev libxml2-dev libxslt1-dev python-dev libevent-dev libsnappy-dev
20 | 
21 | You're then ready to install the package for all users:
22 | sudo python setup.py install
23 | 
24 | 
25 |  Usage: python -m emissary.run <args>
26 | 
27 |   -h, --help            show this help message and exit
28 |   -c, --crontab         Crontab to parse
29 |   --config              (defaults to emissary.config)
30 |   -a, --address         (defaults to 0.0.0.0)
31 |   -p, --port            (defaults to 6362)
32 |   --export              Write the existing database as a crontab
33 |   --key                 SSL key file
34 |   --cert                SSL certificate
35 |   --pidfile             (defaults to ./emissary.pid)
36 |   --logfile             (defaults to ./emissary.log)
37 |   --stop                
38 |   --debug               Log to stdout
39 |   -d                    Run in the background
40 |   --run-as              (defaults to the invoking user)
41 |   --scripts-dir         (defaults to ./scripts/)
42 | 
43 | 
44 | Some initial setup has to be done before the system will start.
45 | Communication with Emissary is mainly done over HTTPS connections
46 | and for that you're going to need an SSL certificate and a key:
47 | 
48 | user@host $ openssl genrsa 4096 > key
49 | user@host $ openssl req -new -x509 -nodes -sha256 -days 365 -key key > cert
50 | 
51 | To prevent your API keys ever getting put into version control for all
52 | the world to see, you need to put a database URI into the environment:
53 | 
54 | export EMISSARY_DATABASE="sqlite://///home/you/.emissary.db"
55 | 
56 | Protip: Put that last line in your shells' rc file.
57 | 
58 | Start an instance in the foreground to obtain your first API key:
59 | 
60 | user@host $ python -m emissary.run --cert cert --key key
61 | 14/06/2015 16:31:30 - Emissary - INFO - Starting Emissary 2.0.0.
62 | e5a59e0a-b457-45c6-9d30-d983419c43e1
63 | ^That UUID is your Primary API key. Add it to this example crontab:
64 | 
65 | user@host $ cat feeds.txt
66 | apikey: your-api-key-here
67 | 
68 | # url                                                 name            group            minute  hour    day     month   weekday
69 | http://news.ycombinator.com/rss                       "HN"            "HN"             */15    *       *       *       *
70 | http://phys.org/rss-feed/                             "Phys.org"      "Phys.org"       1       12      *       *       *
71 | http://feeds.nature.com/news/rss/most_recent          "Nature"        "Nature"         30      13      *       *       *
72 | 
73 | user@host $ python -m emissary.run -c feeds.txt
74 | Using API key "Primary".
75 | Primary: Creating feed group HN.
76 | Primary: HN: Creating feed "HN"
77 | 
78 | Emissary supports multiple apikey directives in one crontab.
79 | Subsequent feed definitions are associated with the previous key.
80 | 
81 | Start an instance in the background and connect to it:
82 | user@host $ python -m emissary.run -d --cert cert --key key
83 | user@host $ python -m emissary.repl
84 | Emissary 2.0.0
85 | Psybernetics 2015
86 | 
87 | (3,204) > help
88 | 
89 | </pre>
90 | 
91 | If the prospect of creating an NSA profile of your reading habits is
92 | something that rightfully bothers you then my advice is to subscribe
93 | to many things and then use Emissary to read the things that really 
94 | interest you.
95 | 
96 | ![Alt text](doc/emissary5.png?raw=true "ncurses programmatic access")
97 | 


--------------------------------------------------------------------------------
/crontab:
--------------------------------------------------------------------------------
1 | apikey: your-api-key-here
2 | 
3 | # url                                                 name         group     minute  hour    day     month   weekday
4 | http://news.ycombinator.com/rss                       "HN"         "HN"      20!     *       *       *       *
5 | http://mf.feeds.reuters.com/reuters/UKdomesticNews    "Reuters UK" "Reuters" 0       3!      *       *       *
6 | 


--------------------------------------------------------------------------------
/doc/emissary2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukeB42/Emissary/31629a8baedc91a9b60c551a01b2b45372b9a8c7/doc/emissary2.png


--------------------------------------------------------------------------------
/doc/emissary3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukeB42/Emissary/31629a8baedc91a9b60c551a01b2b45372b9a8c7/doc/emissary3.png


--------------------------------------------------------------------------------
/doc/emissary4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukeB42/Emissary/31629a8baedc91a9b60c551a01b2b45372b9a8c7/doc/emissary4.png


--------------------------------------------------------------------------------
/doc/emissary5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukeB42/Emissary/31629a8baedc91a9b60c551a01b2b45372b9a8c7/doc/emissary5.png


--------------------------------------------------------------------------------
/emissary/__init__.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding: utf-8 _*_
  2 | #
  3 | # The structure of this package is essentially as follows
  4 | #
  5 | # models.py    Our abstractions for the types of data we persist to a database,
  6 | #              including how to represent columns and joins on other tables as singular
  7 | #              JSON documents. Handy for building list comprehensions of models.
  8 | # resources/   RESTful API endpoints for interacting with models over HTTP
  9 | # controllers/ Miscellaneous utilities used throughout the whole project
 10 | # run.py       A runner program that inserts a database schema if none is present,
 11 | #              binds to a network interface and changes UID if asked.
 12 | # repl.py      An interactive read-eval-print loop for working with the REST interface.
 13 | # config.py    Defines how to obtain a database URI.
 14 | """
 15 | A democracy thing for researchers, programmers and news junkies who want personally curated news archives.
 16 | Emissary is a web content extractor that has a RESTful API and a scripting system.
 17 | Emissary stores the full text of linked articles from RSS feeds or URLs containing links.
 18 | """
 19 | 
 20 | from pkgutil import extend_path
 21 | __path__ = extend_path(__path__, __name__)
 22 | __all__ = ["client", "controllers", "models", "resources", "run", "repl"]
 23 | 
 24 | import time
 25 | from flask import Flask
 26 | from flask.ext import restful
 27 | from flask.ext.sqlalchemy import SQLAlchemy
 28 | from multiprocessing import Queue, cpu_count
 29 | from sqlalchemy.engine.reflection import Inspector
 30 | 
 31 | app = Flask("emissary")
 32 | 
 33 | # This config is the default and can be overridden by
 34 | # using options.config in run.py (python -m emissary.run -c somefile.py)
 35 | app.config.from_object("emissary.config")
 36 | 
 37 | app.version = "2.1.1"
 38 | app.inbox = Queue()
 39 | app.scripts     = None
 40 | app.feedmanager = None
 41 | app.config["HTTP_BASIC_AUTH_REALM"] = "Emissary " + app.version
 42 | 
 43 | 
 44 | # These are response queues that enable the main thread of execution to
 45 | # share data with the REST interface. Mainly for reporting the status of crontabs.
 46 | app.queues = []
 47 | for i in range(cpu_count() * 2):
 48 | 	q = Queue()
 49 | 	q.access = time.time()
 50 | 	app.queues.append(q)
 51 | 
 52 | db = SQLAlchemy(app)
 53 | api = restful.Api(app, prefix='/v1')
 54 | 
 55 | def init():
 56 | 	# Models are imported here to prevent a circular import where we would
 57 | 	# import models and the models would import that db object just above us.
 58 | 
 59 | 	# They're also imported here in this function because they implicitly
 60 | 	# monkey-patch the threading module, and we might not need that if all we want
 61 | 	# from the namespace is something like app.version, like in repl.py for example.
 62 | 	from models import APIKey
 63 | 	from models import FeedGroup
 64 | 	from models import Feed
 65 | 	from models import Article
 66 | 	from models import Event
 67 | 
 68 | 	from resources import api_key
 69 | 	from resources import feeds
 70 | 	from resources import feedgroups
 71 | 	from resources import articles
 72 | 
 73 | 	api.add_resource(api_key.KeyCollection,          "/keys")
 74 | 	api.add_resource(api_key.KeyResource,            "/keys/<string:name>")
 75 | 
 76 | 	api.add_resource(feedgroups.FeedGroupCollection, "/feeds")
 77 | 	api.add_resource(feedgroups.FeedGroupResource,   "/feeds/<string:groupname>")
 78 | 	api.add_resource(feedgroups.FeedGroupStop,       "/feeds/<string:groupname>/stop")
 79 | 	api.add_resource(feedgroups.FeedGroupStart,      "/feeds/<string:groupname>/start")
 80 | 	api.add_resource(feedgroups.FeedGroupArticles,   "/feeds/<string:groupname>/articles")
 81 | 	api.add_resource(feedgroups.FeedGroupSearch,     "/feeds/<string:groupname>/search/<string:terms>")
 82 | 	api.add_resource(feedgroups.FeedGroupCount,      "/feeds/<string:groupname>/count")
 83 | 
 84 | 	api.add_resource(feeds.FeedResource,             "/feeds/<string:groupname>/<string:name>")
 85 | 	api.add_resource(feeds.FeedArticleCollection,    "/feeds/<string:groupname>/<string:name>/articles")
 86 | 	api.add_resource(feeds.FeedSearch,               "/feeds/<string:groupname>/<string:name>/search/<string:terms>")
 87 | 	api.add_resource(feeds.FeedStartResource,        "/feeds/<string:groupname>/<string:name>/start")
 88 | 	api.add_resource(feeds.FeedStopResource,         "/feeds/<string:groupname>/<string:name>/stop")
 89 | 
 90 | 	api.add_resource(articles.ArticleCollection,     "/articles")
 91 | 	api.add_resource(articles.ArticleResource,       "/articles/<string:uid>")
 92 | 	api.add_resource(articles.ArticleSearch,         "/articles/search/<string:terms>")
 93 | 	api.add_resource(articles.ArticleCount,          "/articles/count")
 94 | 
 95 | 	# Create the database schema if it's not already laid out.
 96 | 	inspector = Inspector.from_engine(db.engine)
 97 | 	tables = [table_name for table_name in inspector.get_table_names()]
 98 | 
 99 | 	if 'api_keys' not in tables:
100 | 		db.create_all()
101 | 		master = models.APIKey(name = app.config['MASTER_KEY_NAME'])
102 | 		if app.config['MASTER_KEY']: master.key = app.config['MASTER_KEY']
103 | 		else: master.key = master.generate_key_str()
104 | 		print master.key
105 | 		master.active = True
106 | 		db.session.add(master)
107 | 		db.session.commit()
108 | 


--------------------------------------------------------------------------------
/emissary/client.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import pprint
 3 | import json
 4 | import cmd
 5 | import os
 6 | os.environ['no_proxy'] = '127.0.0.1,localhost'
 7 | requests.packages.urllib3.disable_warnings()
 8 | 
 9 | class Client(object):
10 | 	def __init__(self, key, base_url, verify=True, timeout=2.500):
11 | 		self.key = key
12 | 		self.base = base_url
13 | 		pp = pprint.PrettyPrinter(indent=4)
14 | 		self.p = pp.pprint
15 | 		self.verify_https = verify
16 | 		self.timeout = timeout
17 | 
18 | 		# Defining a username manually on your client objects will
19 | 		# permit you to use the .can() shortcut for determining
20 | 		# the username's access rights.
21 | 		self.username = None
22 | 
23 | 		if not self.base.endswith('/'):
24 | 			self.base += '/'
25 | 
26 | 	def _send_request(self, url, type='GET', body={}, headers={}):
27 | 		headers['Authorization'] =  "Basic %s" % self.key
28 | 		url = self.base+url
29 | 		resp = None
30 | 		if type=='GET':
31 | 			resp = requests.get(url, verify=self.verify_https,
32 | 				headers=headers, timeout=self.timeout)
33 | 		elif type=='DELETE':
34 | 			resp = requests.delete(url, verify=self.verify_https,
35 | 				data=body, headers=headers, timeout=self.timeout)
36 | 		elif type=='PUT':
37 | 			resp = requests.put(url, verify=self.verify_https,
38 | 				data=body, headers=headers, timeout=self.timeout)
39 | 		elif type=='POST':
40 | 			resp = requests.post(url, verify=self.verify_https,
41 | 				data=body, headers=headers, timeout=self.timeout)
42 | 		try: return resp.json(), resp.status_code
43 | 		except: return {}, resp.status_code
44 | 
45 | 	def get(self, url, body={}, headers={}):
46 | 		return self._send_request(url, body=body, headers=headers)
47 | 
48 | 	def put(self, url, body={}, headers={}):
49 | 		return self._send_request(url, type='PUT', body=body, headers=headers)
50 | 
51 | 	def post(self, url, body={}, headers={}):
52 | 		return self._send_request(url, type='POST', body=body, headers=headers)
53 | 
54 | 	def delete(self, url, body={}, headers={}):
55 | 		return self._send_request(url, type='DELETE', body=body, headers=headers)
56 | 
57 | 	def pp(self, url, type='GET', body={}, headers={}):
58 | 		self.p(self._send_request(url, type, body, headers))
59 | 
60 | 	def keys(self, type='GET', body={}, headers={}):
61 | 		return self._send_request("keys", type, body, headers)
62 | 
63 | 	def __repr__(self):
64 | 		return "<API Client for %s>" % self.base
65 | 


--------------------------------------------------------------------------------
/emissary/config.py:
--------------------------------------------------------------------------------
 1 | import os, getpass
 2 | if not 'EMISSARY_DATABASE' in os.environ:
 3 | 	print 'You need to export a URI for EMISSARY_DATABASE'
 4 | 	print 'Eg: export EMISSARY_DATABASE="sqlite://///home/%s/.emissary.db"' % getpass.getuser()
 5 | 	raise SystemExit
 6 | else:
 7 | 	SQLALCHEMY_DATABASE_URI = (
 8 |     	os.environ['EMISSARY_DATABASE']
 9 | 	)
10 | 
11 | MASTER_KEY        = None
12 | MASTER_KEY_NAME   = "Primary"
13 | PERMIT_NEW        = False
14 | GZIP_HERE         = True
15 | COMPRESS_ARTICLES = True
16 | ENABLE_CORS       = False
17 | if "NO_DUPLICATE_TITLES" in os.environ:
18 | 	NO_DUPLICATE_TITLES = os.environ['DUPLICATE_TITLES']
19 | else:
20 | 	NO_DUPLICATE_TITLES = True
21 | 


--------------------------------------------------------------------------------
/emissary/controllers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukeB42/Emissary/31629a8baedc91a9b60c551a01b2b45372b9a8c7/emissary/controllers/__init__.py


--------------------------------------------------------------------------------
/emissary/controllers/cron.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # From http://stackoverflow.com/questions/373335/suggestions-for-a-cron-like-scheduler-in-python
  3 | import gevent
  4 | import time, sys
  5 | from datetime import datetime, timedelta
  6 | 
  7 | class CronError(Exception):
  8 |     def __init__(self, message):
  9 |         self.message = message
 10 |     def __str__(self):
 11 |         return repr(self.message)
 12 | 
 13 | class days:
 14 |     mon = 0
 15 |     tue = 1
 16 |     wed = 2
 17 |     thu = 3
 18 |     fri = 4
 19 |     sat = 5
 20 |     sun = 6
 21 | 
 22 | class months:
 23 |     jan = 1
 24 |     feb = 2
 25 |     mar = 3
 26 |     apr = 4
 27 |     may = 5
 28 |     jun = 6
 29 |     jul = 7
 30 |     aug = 8
 31 |     sep = 9
 32 |     oct = 10
 33 |     nov = 11
 34 |     dec = 12
 35 | 
 36 | # Turn a list of timing data into raw numeric values
 37 | def parse_timings(timings):
 38 |     # minute  hour    day month   weekday
 39 |     # 0       6,12    *   0-11    mon-sun
 40 |     # Currently contains off by one errors.
 41 |     if type(timings) == str:
 42 |         timings = timings.split()
 43 |     if len(timings) != 5:
 44 |         print len(timings), timings
 45 |         raise CronError('Timings require five fields.')
 46 |     minute = hour = day = month = weekday = []
 47 |     if timings[0] == '*': minute  = allMatch # range(0,60)
 48 |     if timings[1] == '*': hour    = allMatch # range(0,24)
 49 |     if timings[2] == '*': day     = allMatch # range(0,32)
 50 |     if timings[3] == '*': month   = allMatch # range(0,12)
 51 |     if timings[4] == '*': weekday = allMatch # range(0,7)
 52 |     for i, v in enumerate(timings):
 53 |         if len(v) < 3:
 54 |             try:
 55 |                 r = int(v)
 56 |                 if i == 0: minute    = [r]
 57 |                 if i == 1: hour        = [r]
 58 |                 if i == 2: day        = [r]
 59 |                 if i == 3: month    = [r]
 60 |                 if i == 4: weekday    = [r]
 61 |             except:
 62 |                 pass
 63 |         if ',' in v:         # TODO: Incorporate lists of days and months.
 64 |             t = v.split(',')
 65 |             x=[]
 66 |             for f in t:
 67 |                 x.append(int(f))
 68 |             if i == 0: minute  = x
 69 |             if i == 1: hour    = x
 70 |             if i == 2: day     = x
 71 |             if i == 3: month   = x
 72 |             if i == 4: weekday = x
 73 |             del t,f,x
 74 |         if v.endswith("!") or v.startswith("*/"):
 75 |             s = ""
 76 |             for j in v:
 77 |                 if j.isdigit():
 78 |                     s += j
 79 |             s = int(s)
 80 |             if i == 0: minute  = range(0,60,s)
 81 |             if i == 1: hour    = range(0,24,s)
 82 |             if i == 2: day     = range(0,32,s)
 83 |             if i == 3: month   = range(0,12,s)
 84 |             if i == 4: weekday = range(0,7,s)
 85 |         if '-' in v and len(v) > 2:
 86 |             r = v.split('-')
 87 |             for n,m in enumerate(r):
 88 |                 try:
 89 |                     r[n] = int(m)
 90 |                 except:
 91 |                     pass
 92 |                 if type(r[n]) == int:
 93 |                     if i == 0: minute  = range(r[0],int(r[1])+1)
 94 |                     if i == 1: hour    = range(r[0],int(r[1])+1)
 95 |                     if i == 2: day     = range(r[0],int(r[1])+1)
 96 |                     if i == 3: month   = range(r[0],int(r[1])+1)
 97 |                     if i == 4: weekday = range(r[0],int(r[1])+1)
 98 |                     continue
 99 |                 else:
100 |                     start = stop = None
101 |                     if i == 3: # Months
102 |                         if hasattr(months,r[0]):
103 |                             start = getattr(months,r[0])
104 |                         if hasattr(months,r[1]):
105 |                             stop  = getattr(months,r[1])
106 |                         if (start and stop) != None:
107 |                             month = range(start,stop+1)
108 |                             del start, stop
109 |                         else:
110 |                             raise CronError('Malformed month data.')
111 |                     if i == 4: # Weekdays
112 |                         if hasattr(days,r[0]):
113 |                             start = getattr(days,r[0])
114 |                         if hasattr(days,r[1]):
115 |                             stop  = getattr(days,r[1])
116 |                         if (start and stop) != None:
117 |                             weekday = range(start,stop+1)
118 |                             del start, stop
119 |                         else:
120 |                             raise CronError('Malformed day-of-the-week data.')
121 |             del v,i,r,n,m,
122 |     return minute, hour, day, month, weekday 
123 | 
124 | def parse_crontab_line(line,lineno=None,tcpd=False):
125 |     url=line.split()[0]
126 |     f=line.split()[1:]
127 |     for i,w in enumerate(f):
128 |         if w.endswith("'"): break
129 |     name = ' '.join(f[:i+1]).strip("'")
130 |     timings = ' '.join(f[i+1:])     # Minutes Hour Day Month Weekday
131 |     parse_timings(timings)
132 |     if not tcpd:
133 |         if lineno:
134 |             print "Line %s. %s: %s %s" % (lineno,name,url,timings)
135 |         else:
136 |             print "%s: %s %s" % (name,url,timings)
137 |     return (url,name,timings)
138 | 
139 | # Some utility classes / functions first
140 | class AllMatch(set):
141 |     """Universal set - match everything"""
142 |     def __contains__(self, item): return True
143 | 
144 | allMatch = AllMatch()
145 | 
146 | def conv_to_set(obj):  # Allow single integer to be provided
147 |     if isinstance(obj, (int,long)):
148 |         return set([obj])  # Single item
149 |     if not isinstance(obj, set):
150 |         obj = set(obj)
151 |     return obj
152 | 
153 | class Event(object):
154 |     def __init__(self, action, min=allMatch, hour=allMatch,
155 |                        day=allMatch, month=allMatch, dow=allMatch,
156 |                        args=(), kwargs={}):
157 |         self.mins = conv_to_set(min)
158 |         self.hours= conv_to_set(hour)
159 |         self.days = conv_to_set(day)
160 |         self.months = conv_to_set(month)
161 |         self.dow = conv_to_set(dow)
162 |         self.action = action
163 |         self.args = args
164 |         self.kwargs = kwargs
165 |         self.running = False
166 |         self.name = None
167 | 
168 |     def matchtime(self, t):
169 |         """Return True if this event should trigger at the specified datetime"""
170 |         return ((t.minute     in self.mins) and
171 |                 (t.hour       in self.hours) and
172 |                 (t.day        in self.days) and
173 |                 (t.month      in self.months) and
174 |                 (t.weekday()  in self.dow))
175 | 
176 |     def check(self, t):
177 |         if self.matchtime(t):
178 |             self.running = True
179 |             self.action(*self.args, **self.kwargs)
180 |             self.running = False
181 | 
182 | class CronTab(gevent.Greenlet):
183 |     def __init__(self, *events):
184 |         self.events = events
185 |         self.name = None
186 |         gevent.Greenlet.__init__(self)
187 | 
188 |     def _run(self):
189 |         t=datetime(*datetime.now().timetuple()[:5])
190 |         while 1:
191 |             for e in self.events:
192 | #                print zip([i for i in dir(self)], [getattr(self,i) for i in dir(self)])
193 |                 if self.inbox:               # This .get() blocks, preventing duplicate greenlets running
194 |                     msg = self.inbox.get() # in the same addr due to our use of multiprocessing.Process
195 |                 e.check(t)
196 |             t += timedelta(minutes=1)
197 |             n = datetime.now()
198 |             while n < t:
199 |                 s = (t - n).seconds + 1
200 |                 time.sleep(s)
201 |                 n = datetime.now()
202 | 
203 |     def __repr__(self):
204 |         if self.name:
205 |             return "<CronTab object '%s' at %s>" % (self.name, hex(id(self)))
206 |         else:
207 |             return "<CronTab object at %s>" % hex(id(self))
208 | 
209 | def parse_crontab(db,log):
210 |     table = db['feeds']
211 | 
212 |     crontab = sys.stdin.read()
213 |     feedlines={}
214 | 
215 |     for index, line in enumerate(crontab.split('\n')):
216 |         if line.startswith('http'):
217 |             index+=1
218 |             feedlines['%s' % index] = line
219 |         elif (line.startswith('#')) or (line == ''): continue
220 |         else: print Utils.parse_option(line,config)
221 | 
222 |     for lineno, feedline in feedlines.items():
223 |         url=name=timings=None
224 |         try:
225 |             (url,name,timings) = Cron.parse_crontab_line(feedline,lineno)
226 |         except EmissaryError, e:
227 |             print e
228 | 
229 |         if url and name and timings:
230 |             # Check URL isn't already loaded
231 |             feed = Feed.Feed(db,log,url=url)
232 |             if 'name' in feed.feed.keys():
233 |                 if name != feed['name'] or timings != feed['timings']:
234 |                     feed.adjust(name,timings)
235 |                     sys.stdout.write("Adjusted %s: %s\n" % (name,feed.feed))
236 |             else:
237 |                 sys.stdout.write('Adding %s\n' % name)
238 |                 feed = Feed.Feed(db,log).create(name,url,timings)
239 | 
240 |     raise SystemExit
241 | 
242 | #if __name__ == '__main__':
243 | #    c = CronTab(Event(lambda x: print "Hello", range(0,59), range(0,23), dow=range(0,5)))
244 | #    c.run()
245 | 
246 | 


--------------------------------------------------------------------------------
/emissary/controllers/fetch.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import urlparse
  3 | import requests
  4 | import feedparser
  5 | from emissary import app, db
  6 | from sqlalchemy import and_, or_
  7 | from emissary.models import Article
  8 | from emissary.controllers import parser
  9 | from emissary.controllers.utils import uid, tconv
 10 | requests.packages.urllib3.disable_warnings()
 11 | 
 12 | snappy = None
 13 | if app.config['COMPRESS_ARTICLES']:
 14 |     try:
 15 |         import snappy
 16 |     except ImportError:
 17 |         pass
 18 | 
 19 | 
 20 | # This is a little globally-available (as far as coroutines calling this are concerned)
 21 | # dictionary of urls we've already visited. It permits us to only try a url
 22 | # four times every half an hour. If we see it again after half an hour we'll
 23 | # try it again, otherwise it stays in the seen dictionary. It also needs periodically
 24 | # emptying, lest it grow infinitely.
 25 | seen = {}
 26 | 
 27 | def get(url):
 28 |     headers = {"User-Agent": "Emissary "+ app.version}
 29 |     return requests.get(url, headers=headers, verify=False)
 30 | 
 31 | # Fetch a feed.url, parse the links, visit the links and store articles.
 32 | def fetch_feed(feed, log):
 33 | 
 34 |     if feed.group:
 35 |         log("%s: %s: Fetching %s." % \
 36 |             (feed.key.name, feed.group.name, feed.name))
 37 |     else:
 38 |         log("%s: Fetching %s." % (feed.key.name, feed.name))
 39 |     try:
 40 |         r = get(feed.url)
 41 |     except Exception, e:
 42 |         log("%s: %s: Error fetching %s: %s" % \
 43 |             (feed.key.name, feed.group.name, feed.name, e.message[0]))
 44 |         return
 45 | 
 46 |     # Fetch the links and create articles
 47 |     links = parser.extract_links(r)
 48 |     title = None
 49 |     for link in links:
 50 | #        try:
 51 |         fetch_and_store(link, feed, log)
 52 | #        except Exception, e:
 53 | #            log("%s: %s: Error with %s: %s" % \
 54 | #                (feed.key.name, feed.name, link, e.message), "error")
 55 | 
 56 | def fetch_and_store(link, feed, log, key=None, overwrite=False):
 57 |     """
 58 |      Fetches, extracts and stores a URL.
 59 |      link can be a list of urls or a dictionary of url/title pairs.
 60 |     """
 61 |     then = int(time.time())
 62 |     # If the feed was XML data then we probably have a dictionary of
 63 |     # url:title pairs, otherwise we have a list of urls.
 64 |     if type(link) == dict:
 65 |         for url, title in link.items(): continue
 66 |     else:
 67 |         url   = link
 68 |         title = None
 69 | 
 70 |     # Skip this url if we've already extracted and stored it for this feed, unless we're overwriting.
 71 |     if Article.query.filter(and_(Article.url == url, Article.feed == feed)).first():
 72 |         if overwrite:
 73 |             log("%s: %s/%s: Preparing to overwrite existing copy of %s" % \
 74 |                 (feed.key.name, feed.group.name, feed.name, url), "debug")
 75 |         else:
 76 |             log("%s: %s/%s: Already storing %s" % (feed.key.name, feed.group.name, feed.name, url), "debug")
 77 |             return
 78 | 
 79 |     # Fix links with no schema
 80 |     if not "://" in url:
 81 |         url = "http://" + url
 82 | 
 83 |     # Store our awareness of this url during this run in a globally available dictionary,
 84 |     # in the form [counter, timestamp].
 85 |     if url not in seen:
 86 |         seen[url]  = [1, int(time.time())]
 87 |     else:
 88 |         # If we haven't modified the counter for half an hour, reset it.
 89 |         now = int(time.time())
 90 |         if (now - seen[url][1]) > 60*30:
 91 |             seen[url] = [1, int(time.time())]
 92 |         # If we have tried this URL four times, disregard it.
 93 |         # We might reset its counter in half an hour anyway.
 94 |         if seen[url][0] >= 4:
 95 |             return
 96 |         # Otherwise increment and continue with storing.
 97 |         seen[url][0] += 1
 98 |         seen[url][1] = int(time.time())
 99 | 
100 |     # Prune seen URLs older than a day.
101 |     for _ in seen.copy():
102 |         if int(time.time()) - seen[_][1] > 86400:
103 |             del seen[_]
104 | 
105 |     try:
106 |         document = get(url)
107 |     except Exception, e:
108 |         log("%s: %s/%s: Error fetching %s: %s" % \
109 |             (feed.key.name, feed.group.name, feed.name, url, e.message[0]))
110 |         return
111 | 
112 |     # Mimetype detection.
113 |     if 'content-type' in document.headers:
114 |         if 'application' in document.headers['content-type']:
115 |             if not title:
116 |                 title = url
117 |             article = Article(
118 |                 url=url,
119 |                 title=title,
120 |             )
121 |             if not "://" in article.url:
122 |                 article.url = "http://" + article.url
123 |             commit_to_feed(feed, article)
124 |             log("%s: %s/%s: Stored %s, reference to %s (%s)" % \
125 |                 (feed.key.name, feed.group.name, feed.name, article.uid, url, document.headers['content-type']))
126 |             return
127 | 
128 |     # Document parsing.
129 |     try:
130 |         article_content = parser.extract_body(document.text)
131 |         summary      = parser.summarise(article_content)
132 |     except Exception, e:
133 |         log("%s: %s: Error parsing %s: %s" % (feed.key.name, feed.group.name, url, e.message))
134 |         return
135 | 
136 |     # Ensure a title and disregard dupes
137 |     if not title:
138 |         title = parser.extract_title(document.text)
139 | 
140 |     if app.config['NO_DUPLICATE_TITLES']:
141 |         if Article.query.filter(
142 |             and_(Article.title == title, Article.key == feed.key)
143 |         ).first():
144 |             return
145 | 
146 |     # Initial article object
147 |     article = Article(
148 |         url=url,
149 |         title=title,
150 |         summary=summary
151 |     )
152 | 
153 |     # Determine whether to store the full content or a compressed copy
154 |     if not app.config['COMPRESS_ARTICLES']:
155 |         article.content=article_content
156 |     else:
157 |         article.ccontent = snappy.compress(article_content.encode("utf-8", "ignore"))
158 |         article.compressed = True
159 | 
160 |     #
161 |     # We execute scripts before committing articles to the database
162 |     # it runs the risk of a singular script halting the entire thing
163 |     # in return we get to modify articles (ie machine translation) before storing.
164 | 
165 |     # Non-blocking IO will result in the most reliable performance within your scripts.
166 |     #
167 |     for s in app.scripts.scripts.values():
168 |         try:
169 |             s.execute(env={'article':article, 'feed':feed})
170 |             article = s['article']
171 |         except Exception, e:
172 |             log("Error executing %s: %s" % (s.file, e.message), "error")
173 | 
174 |     commit_to_feed(feed, article)
175 | 
176 |     now = int(time.time())
177 |     duration = tconv(now-then)
178 |     log('%s: %s/%s: Stored %s "%s" (%s)' % \
179 |         (feed.key.name, feed.group.name, feed.name, article.uid, article.title, duration))
180 |     del then, now, duration, feed, article, url, title
181 |     return
182 | 
183 | def fetch_feedless_article(key, url, overwrite=False):
184 |     """
185 |      Given a URL, create an Article and attach it to a Key.
186 |     """
187 |     then = int(time.time())
188 |     log  = app.log
189 | 
190 |     if Article.query.filter(Article.url == url).first():
191 |         if overwrite:
192 |             log("%s: Preparing to overwrite existing copy of %s" % (key.name,url), "debug")
193 |         else:
194 |             log("%s: Already storing %s" % (key.name, url), "debug")
195 |             return
196 | 
197 |     try:
198 |         response = get(url)
199 |     except Exception, e:
200 |         log("%s: Error fetching %s: %s." % (key.name, url, e.message))
201 |         return
202 | 
203 |     article_content = parser.extract_body(response.text)
204 |     title           = parser.extract_title(response.text)
205 |     summary         = parser.summarise(article_content)
206 |     article = Article(
207 |             url=url,
208 |             title=title,
209 |             summary=summary
210 |     )
211 | 
212 |     if not app.config['COMPRESS_ARTICLES']:
213 |         article.content = article_content
214 |     else:
215 |         article.ccontent = snappy.compress(article_content.encode("utf-8", "ignore"))
216 |         article.compress = True
217 | 
218 |     for s in app.scripts.scripts.values():
219 |         try:
220 |             s.execute(env={'article':article, 'feed':None})
221 |             article = s['article']
222 |         except Exception, e:
223 |             log("Error executing %s: %s" % (s.file, e.message), "error")
224 | 
225 |     key.articles.append(article)
226 | 
227 |     article.uid = uid()
228 | 
229 |     db.session.add(article)
230 |     db.session.add(key)
231 |     db.session.commit()
232 | 
233 |     now      = int(time.time())
234 |     duration = tconv(now-then)
235 |     log('%s: Stored %s "%s" (%s)' % (key.name, article.uid, article.title, duration))
236 |     return article
237 | 
238 | def commit_to_feed(feed, article):
239 |     """
240 |      Place a new article on the api key of a feed, the feed itself,
241 |      and commit changes.
242 |     """
243 | 
244 |     # We give articles UIDs manually to ensure unique time data is used.
245 |     article.uid = uid()
246 | 
247 |     session = feed._sa_instance_state.session
248 |     feed.articles.append(article)
249 |     feed.key.articles.append(article)
250 | 
251 |     session.add(article)
252 |     session.add(feed)
253 |     session.commit()
254 |     del article, feed, session
255 | 


--------------------------------------------------------------------------------
/emissary/controllers/load.py:
--------------------------------------------------------------------------------
  1 | # This file contains functions designed for
  2 | # loading cron tables and storing new feeds.
  3 | 
  4 | from emissary import db
  5 | from sqlalchemy import and_
  6 | from emissary.controllers.utils import spaceparse
  7 | from emissary.controllers.cron import parse_timings
  8 | from emissary.models import APIKey, Feed, FeedGroup
  9 | 
 10 | def create_feed(log, db, key, group, feed):
 11 | 	"""
 12 | 	Takes a key object, a group name and a dictionary
 13 | 	describing a feed ({name:,url:,schedule:,active:})
 14 | 	and reliably attaches a newly created feed to the key
 15 | 	and group.
 16 | 	"""
 17 | 	if not type(feed) == dict:
 18 | 		log('Unexpected type when creating feed for API key "%s"' % key.name)
 19 | 		return
 20 | 
 21 | 	for i in ['name', 'schedule', 'active', 'url']:
 22 | 		if not i in feed.keys():
 23 | 			log('%s: Error creating feed. Missing "%s" field from feed definition.' % (key.name, i))
 24 | 			return
 25 | 
 26 | 	f = Feed.query.filter(and_(Feed.key == key, Feed.name == feed['name'])).first()
 27 | 	fg = FeedGroup.query.filter(and_(FeedGroup.key == key, FeedGroup.name == group)).first()
 28 | 
 29 | 	if f:
 30 | 		if f.group:
 31 | 			log('%s: Error creating feed "%s" in group "%s", feed already exists in group "%s".' % \
 32 | 				(key.name, feed['name'], group, f.group.name))
 33 | 			return
 34 | 		elif fg:
 35 | 			log('%s: %s: Adding feed "%s"' % (key.name, fg.name, f.name))
 36 | 			fg.append(f)
 37 | 			db.session.add(fg)
 38 | 			db.session.add(f)
 39 | 			db.session.commit()
 40 | 			return 
 41 | 
 42 | 	if not fg:
 43 | 		log('%s: Creating feed group %s.' % (key.name, group))
 44 | 		fg = FeedGroup(name=group)
 45 | 		key.feedgroups.append(fg)
 46 | 
 47 | 	try:
 48 | 		parse_timings(feed['schedule'])
 49 | 	except Exception, e:
 50 | 		log('%s: %s: Error creating "%s": %s' % \
 51 | 			(key.name, fg.name, feed['name'], e.message))	
 52 | 
 53 | 	log('%s: %s: Creating feed "%s"' % (key.name, fg.name, feed['name']))
 54 | 	f = Feed(
 55 | 		name=feed['name'],
 56 | 		url=feed['url'],
 57 | 		active=feed['active'],
 58 | 		schedule=feed['schedule']
 59 | 	)
 60 | 	fg.feeds.append(f)
 61 | 	key.feeds.append(f)
 62 | 	db.session.add(key)
 63 | 	db.session.add(fg)
 64 | 	db.session.add(f)
 65 | 	db.session.commit()
 66 | 
 67 | def parse_crontab(filename):
 68 | 	"""
 69 | 	Get a file descriptor on filename and
 70 | 	create feeds and groups for API keys therein.
 71 | 	"""
 72 | 	def log(message):
 73 | 		print message
 74 | 	# read filename into a string named crontab
 75 | 	try:
 76 | 		fd = open(filename, "r")
 77 | 	except OSError:
 78 | 		print "Error opening %s" % filename
 79 | 		raise SystemExit
 80 | 	crontab = fd.read()
 81 | 	fd.close()
 82 | 
 83 | 	# keep a resident api key on hand
 84 | 	key = None
 85 | 
 86 | 	for i, line in enumerate(crontab.split('\n')):
 87 | 
 88 | 		# Set the APIKey we're working with when we find a line starting
 89 | 		# with apikey:
 90 | 		if line.startswith("apikey:"):
 91 | 			if ' ' in line:
 92 | 				key_str = line.split()[1]
 93 | 				key = APIKey.query.filter(APIKey.key == key_str).first()
 94 | 			if not key:
 95 | 				print 'Malformed or unknown API key at line %i in %s: %s' % (i+1, filename, line)
 96 | 				raise SystemExit
 97 | 			else:
 98 | 				print 'Using API key "%s".' % key.name
 99 | 
100 | 		if line.startswith("http"):
101 | 			feed = {'active': True}
102 | 
103 | 			# Grab the URL and set the string to the remainder
104 | 			feed['url'] = line.split().pop(0)
105 | 			line = ' '.join(line.split()[1:])
106 | 
107 | 			# Grab names and groups
108 | 			names = spaceparse(line)
109 | 			if not names:
110 | 				print "Error parsing feed or group name at line %i in %s: %s" % (i+1, filename, line)
111 | 				continue
112 | 			feed['name'], group = names[:2]
113 | 
114 | 			# The schedule should be the last five items
115 | 			schedule = line.split()[-5:]
116 | 			try:
117 | 				parse_timings(schedule)
118 | 			except Exception, e:
119 | 				print "Error parsing schedule at line %i in %s: %s" % (i+1, filename, e.message)
120 | 				continue
121 | 
122 | 			feed['schedule'] = ' '.join(schedule)
123 | 
124 | 			create_feed(log, db, key, group, feed)
125 | 


--------------------------------------------------------------------------------
/emissary/controllers/log.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file provides a generic logging class.
 3 | It could do with automatic file rotation and syslog support.
 4 | 
 5 | Luke Brooks 2015
 6 | MIT License.
 7 | """
 8 | import logging, time
 9 | 
10 | class Log(object):
11 | 	def __init__(self, program, log_file=None, log_stdout=False):
12 | 		self.program = program
13 | 		self.log = None
14 | 		self.debug = False
15 | 
16 | 		if log_file or log_stdout:
17 | 			formatter = logging.Formatter(
18 | 				'%(asctime)s - %(name)s - %(levelname)s - %(message)s', '%d/%m/%Y %H:%M:%S'
19 | 			)
20 | 			self.log = logging.getLogger(program)
21 | 			self.log.setLevel(logging.DEBUG)
22 | 
23 | 			if log_stdout:
24 | 				ch = logging.StreamHandler()
25 | 				ch.setLevel(logging.DEBUG)
26 | 				ch.setFormatter(formatter)
27 | 				self.log.addHandler(ch)
28 | 
29 | 			if log_file:
30 | 				ch = logging.FileHandler(log_file, 'a')
31 | 				ch.setLevel(logging.DEBUG)
32 | 				ch.setFormatter(formatter)
33 | 				self.log.addHandler(ch)
34 | 
35 | 	def __call__(self, data, level='info'):
36 | 		if self.log:
37 | 			if level == 'debug': level 		= 10
38 | 			if level == 'info': level 		= 20
39 | 			if level == 'warning': level 	= 30
40 | 			if level == 'error': level 		= 40
41 | 			if level == 'critical': level	= 50
42 | 
43 | 			if (level > 15) or (self.debug):
44 | 				self.log.log(level,data)
45 | 


--------------------------------------------------------------------------------
/emissary/controllers/manager.py:
--------------------------------------------------------------------------------
  1 | from gevent.queue import Queue
  2 | import sys, os, time, pwd, optparse, gevent, hashlib
  3 | 
  4 | from sqlalchemy import and_
  5 | from emissary.models import Feed, FeedGroup, APIKey
  6 | from emissary.controllers import cron
  7 | from emissary.controllers import fetch
  8 | 
  9 | class EmissaryError(Exception):
 10 | 	def __init__(self, message):
 11 | 		self.message = message
 12 | 	def __str__(self):
 13 | 		return repr(self.message)
 14 | 
 15 | class FeedManager(object):
 16 | 	"""Keeps CronTab objects in rotation"""
 17 | 	def __init__(self, log):
 18 | 		self.log         = log
 19 | 		self.app         = None
 20 | 		self.running     = False
 21 | 		self.crontabs    = {}
 22 | 		self.threads     = []
 23 | 		self.revived     = {} # {name: [amt, time]}
 24 | 
 25 | 	def load_feeds(self):
 26 | 		"""
 27 | 		 Currently just starts all feeds flat, by checking if they and their
 28 | 		 FeedGroup are active.
 29 | 
 30 | 
 31 | 		TODO: Start feeds by API key. Where each CronTab corresponds to a FeedGroup.
 32 | 		"""
 33 | 		for key in APIKey.query.all():
 34 | 
 35 | 			if key.reader:
 36 | 				continue
 37 | 
 38 | 			if not key.active:
 39 | 				self.log('API key "%s" marked inactive. Skipped.' % (key.name))
 40 | 				continue
 41 | 
 42 | 			self.log("%s: Processing feed groups." % key.name)
 43 | 			for fg in key.feedgroups:
 44 | 
 45 | 				if not fg.active:
 46 | 					self.log('%s: Feed group "%s" marked inactive. Skipped.' % \
 47 | 						(key.name, fg.name))
 48 | 					continue
 49 | 
 50 | 				for feed in fg.feeds:
 51 | 					if not feed.active:
 52 | 						self.log('%s:%s: Feed "%s" marked inactive. Skipped.' % \
 53 | 							(key.name, fg.name, feed.name))
 54 | 						continue
 55 | 
 56 | 					self.log('%s: %s: Scheduling "%s" (%s)' % \
 57 | 						(key.name, fg.name, feed.name, feed.schedule))
 58 | 
 59 | 					ct = self.create_crontab(feed)
 60 | 					g = gevent.spawn(ct.run)
 61 | 					g.name = ct.name
 62 | 					self.threads.append(g)
 63 | 					name = self.generate_ct_name(feed)
 64 | 					self.crontabs[name] = ct
 65 | 
 66 | 	def run(self):
 67 | 		"""
 68 | 		 Receive inbox messages and revive feeds.
 69 | 		 Also block duplicate crontab execution.....
 70 | 
 71 | 		 The reason we do this is due to a quirk of
 72 | 		 using Gevent with multiprocessing.Process.
 73 | 
 74 | 		 It's why obtaining the article count in the REPL prompt
 75 | 		 takes a second, but the tradeoff is that Emissary won't
 76 | 		 overutilise your CPU in this loop.
 77 | 
 78 | 		 If you run a greenlet in a subprocess we end up with
 79 | 		 CronTab greenlets executing twice but in the same address space...
 80 | 		 So I've settled on this solution for now after investigating GIPC,
 81 | 		 which works with Flask's built in httpd, but that's not as nimble
 82 | 		 as gevent.WSGIServer.
 83 | 		"""
 84 | 		self.running = True
 85 | 		while self.running:
 86 | 			while not self.app.inbox.empty():
 87 | 				self.receive(self.app.inbox.get(block=False))
 88 | 			# Run feeds
 89 | 			gevent.sleep()
 90 | 			for ct in self.crontabs.values():
 91 | 				if ct.inbox.empty():
 92 | 					ct.inbox.put("ping")
 93 | 				# Check if revive needed
 94 | 				self.revive(ct)
 95 | 			for i in self.threads:
 96 | 				if i.started == False:
 97 | 					self.threads.remove(i)
 98 | 			# the sleep for 50ms keeps cpu utilisation low
 99 | 			gevent.sleep()
100 | 			time.sleep(0.05)
101 | 		self.log("Cleaning up..")
102 | 
103 | 	def create_crontab(self, feed):
104 | 		t        = cron.parse_timings(feed.schedule.split())
105 | 		evt      = cron.Event(                   # One possible design for these crontabs
106 | 					fetch.fetch_feed,            # is to have them correspond to a FeedGroup
107 | 					t[0], t[1], t[2], t[3], t[4],# where each event is a member feed
108 | 					[feed, self.log])            # and stopping the crontab stops the group.
109 | 		evt.feed = feed
110 | 		ct       = cron.CronTab(evt)
111 | 		ct.name  = self.generate_ct_name(feed)
112 | 		ct.inbox = Queue()
113 | 		return ct
114 | 
115 | 	def generate_ct_name(self, feed):
116 | 		"""
117 | 		 Generate a crontab name from a feed object that's
118 | 		 hopefully unique between multiple feeds in multiple groups
119 | 		 on multiple API keys.
120 | 
121 | 		 Determining the feed.key.key string here proved to be too expensive,
122 | 		 so instead it's trusted that the name and creation time are unique enough.
123 | 
124 | 		 Improvements to this implementation are most welcome.
125 | 		"""
126 | 		return hashlib.sha1("%s %s" % (feed.name, feed.created)).hexdigest()
127 | 
128 | 	def revive(self, ct):
129 | 		"""
130 | 		 Restart a dead crontab.
131 | 		 Permit a ceiling amount of restarts.
132 | 		 Only restart a feed once per minute.
133 | 		"""
134 | 		if ct.name in self.revived:
135 | 			now = time.time()
136 | 			then = self.revived[ct.name][1]
137 | 			if (now - then) < 60:
138 | 				return
139 | 			self.revived[ct.name][0] += 1
140 | 			self.revived[ct.name][1] = now
141 | 		else:
142 | 			self.revived[ct.name] = [1, time.time()]
143 | 
144 | 		if ct.started == False:
145 | 			feed         = ct.events[0].feed
146 | 			ct = self.create_crontab(feed)
147 | 			self[ct.name] = ct
148 | 			gevent.spawn(ct.run)
149 | #			if feed.name in self.crontabs.keys():
150 | #				self.log("Restarting %s" % ct.name, "warning")
151 | 			
152 | #			name = self.generate_ct_name(feed)
153 | #			self.crontabs[name] = ct
154 | #			self.log(self.crontabs)
155 | 
156 | 	def receive(self, payload):
157 | 		"""
158 | 		The Feed manager is an actor with an inbox that responds to commands
159 | 		issued by the HTTPD process. We accept a list containing a queue ID
160 | 		a command name that corresponds to FeedManager.handle_<command> and
161 | 		arguments, even if it's just a None.
162 | 		"""
163 | 		if len(payload) < 3 or type(payload) != list: return
164 | 		qid, command, args = payload
165 | 		func = getattr(self, "handle_" + command, None)
166 | 		# Execute on messages with a Queue ID of zero without emitting a response
167 | 		if func and not qid: return(func(args))
168 | 		# Otherwise, use response queues based on access times
169 | 		elif func:
170 | 			# We do a double comparison here in order to sort the queue out of the loop
171 | 			q = [q for q in self.app.queues if hex(id(q)) == qid]
172 | 			if not q:
173 | 				self.log("Couldn't find response queue at %s." % id)
174 | 				return
175 | 			q=q[0]
176 | 			# Put our response on the queue and rotate its priority.
177 | 			try:
178 | 				q.put(func(args))
179 | 			except Exception,e:
180 | 				self.app.log(e.message,'warning')
181 | 			q.access = time.time()
182 | 			self.app.queues.sort(key=lambda q: q.access, reverse=True)
183 | 			return
184 | 		return
185 | 
186 | 	def handle_check(self, feed):
187 | 		"""
188 | 		 Return whether we have a feed running or not.
189 | 		"""
190 | 		name = self.generate_ct_name(feed)
191 | 		if name in self.crontabs and self.crontabs[name].started:
192 | 			return True
193 | 		return False
194 | 
195 | 	def handle_start(self, args):
196 | 		"""
197 | 		 Schedule a feed.
198 | 
199 | 		We look the feed up here because for some reason freshly
200 | 		created ones aren't great at journeying over IPC queues.
201 | 		"""
202 | 		key, name = args
203 | 		feed = Feed.query.filter(and_(Feed.key == key, Feed.name == name)).first()
204 | 		if not feed: return
205 | 
206 | 		self.app.log('%s: %s: Scheduling "%s" (%s)' % \
207 | 			(key.name, feed.group.name, feed.name, feed.schedule))
208 | 		ct = self.create_crontab(feed)
209 | 		self.crontabs[ct.name] = ct
210 | 		g = gevent.spawn(ct.run)
211 | 		g.name = ct.name
212 | 		self.threads.append(g)
213 | 		return True
214 | 
215 | 	def handle_stop(self, args):
216 | 		"""
217 | 		 Halt a feed.
218 | 
219 | 		We can't look the feed up from the database here because we may have
220 | 		already deleted it from our records, so instead we iterate through
221 | 		all of our green threads until something sticks.
222 | 		"""
223 | 		key, name = args
224 | 
225 | 		for id, ct in self.crontabs.items():
226 | 			feed = ct.events[0].feed
227 | 			if feed.name == name and feed.key.key == key.key:
228 | 				if self.app.debug:
229 | 					self.app.log('%s: %s: Unscheduling "%s". [thread %s]' % \
230 | 						(key.name, feed.group.name, feed.name, id))
231 | 				else:
232 | 					self.app.log('%s: %s: Unscheduling "%s".' % \
233 | 						(key.name, feed.group.name, feed.name))
234 | 				for t in self.threads:
235 | 					if t.name == id:
236 | 						gevent.kill(t)
237 | 						break
238 | 				self.threads.remove(t)
239 | 				del ct
240 | 				del self.crontabs[id]
241 | 				return True
242 | 		return False
243 | 
244 | 	def __setitem__(self, name, crontab):
245 | 		if name in self.crontabs.keys():
246 | 			if crontab.name:
247 | 				self.log("Restarting %s" % crontab.name, "warning")
248 | 			else:
249 | 				self.log("Restarting %s" % name, "warning")
250 | 		crontab.name = name
251 | 		self.crontabs[name] = crontab
252 | 		gevent.spawn(crontab)
253 | 
254 | 	def __getitem__(self, name):
255 | 		if name in self.crontabs.keys():
256 | 			return self.crontabs[name]
257 | 		else:
258 | 			raise KeyError('Invalid CronTab')
259 | 
260 | 	def __delitem__(self, name):
261 | 		"""Halt crontab, delete"""
262 | 		if name in self.crontabs.keys():
263 | 			self.crontabs[name].kill()
264 | 			del self.crontabs[name]
265 | 
266 | 	def keys(self):
267 | 		return self.crontabs.keys()
268 | 


--------------------------------------------------------------------------------
/emissary/controllers/parser.py:
--------------------------------------------------------------------------------
  1 | # This file implements routines for extracting links from response objects.
  2 | import re
  3 | import lxml
  4 | import urlparse
  5 | import feedparser
  6 | # We have sought to disperse power, to set men and women free.
  7 | # That really means: to help them to discover that they are free.
  8 | # Everybody's free. The slave is free.
  9 | # The ultimate weapon isn't this plague out in Vegas, or any new super H-bomb.
 10 | # The ultimate weapon has always existed. Every man, every woman, and every child owns it.
 11 | # It's the ability to say No and take the consequences.
 12 | # 'Fear is failure.' 'The fear of death is the beginning of slavery.'
 13 | # "Thou hast no right but to do thy will.'
 14 | # The goose can break the bottle at any second.
 15 | # Socrates took the hemlock to prove it. 
 16 | # Jesus went to the cross to prove it.
 17 | # It's in all history, all myth, all poetry.
 18 | # It's right out in the open all the time."
 19 | from goose import Goose
 20 | 
 21 | def extract_links(response):
 22 |     urls = []
 23 |     if ('content-type' in response.headers.keys()) and ('xml' in response.headers['content-type']):
 24 |         f = feedparser.parse(response.text)
 25 |         for entry in f.entries:
 26 |             urls.append({entry.link: entry.title})
 27 |         del f
 28 |     else: # The following is a highly experimental feature.
 29 |         url = urlparse.urlparse(response.url)
 30 |         url = url.scheme + "://" + url.netloc
 31 |         p = Parser(response.text, url=url)
 32 |         urls = p.parse()
 33 |         del url, p
 34 |     return urls
 35 | 
 36 | class Parser(object):
 37 |     """
 38 |     Build a list of relevant links from an HTML string and the root URL.
 39 | 
 40 |     p = Parser(html_text, root_url)
 41 |     urls = p.parse()
 42 |     """
 43 |     def __init__(self,html=None,doc=None,url=None):
 44 |         self.html=html
 45 |         self.doc=doc
 46 |         try:    self.url = urlparse.urlparse(url).netloc
 47 |         except: self.url = url
 48 |         self.links=[]
 49 | 
 50 |     def root_to_urls(self, doc, titles):
 51 |         """
 52 |         Return a list of urls from an lxml root.
 53 |         """
 54 |         if doc is None:
 55 |             return []
 56 | 
 57 |         a_tags = doc.xpath('//a')
 58 |         # tries to find titles of link elements via tag text
 59 |         if titles:
 60 |             return [ (a.get('href'), a.text) for a in a_tags if a.get('href') ]
 61 |         return [ a.get('href') for a in a_tags if a.get('href') ]
 62 | 
 63 |     def get_urls(self,_input=None,titles=False,regex=False):
 64 |         if (not _input) and (not self.html): return []
 65 |         if not _input: _input = self.html
 66 |         if regex:
 67 |             text = re.sub('<[^<]+?>', ' ', _input)
 68 |             text = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', _input)
 69 |             text = [i.strip() for i in _input]
 70 |             return _input or []
 71 |         if isinstance(_input, str) or isinstance(_input, unicode):
 72 |             doc = self.fromstring(_input)
 73 |         else:
 74 |             doc = text
 75 |         return self.root_to_urls(doc, titles)
 76 | 
 77 |     def fromstring(self, html):
 78 |         try:
 79 |             self.doc = lxml.html.fromstring(html)
 80 |         except Exception, e:
 81 |             return None
 82 |         return self.doc
 83 | 
 84 |     def parse(self,html=None,url=None):
 85 |         """
 86 |         Whittle a list of urls into things we're interested in.
 87 |         """
 88 |         if self.links: self.links=[]
 89 |         urls = self.get_urls(html)
 90 |         if not urls: return urls
 91 |         else: urls = set(urls)
 92 |         if url: url = "http://%s/" % urlparse.urlparse(url).netloc
 93 |         for u in urls:
 94 |             if url:
 95 |                 if u == url: continue
 96 |             if self.url:
 97 |                 if u == self.url: continue
 98 |             if u.startswith('#'): continue
 99 |             if not u.startswith('http'):
100 |                 if url:
101 |                     if (url[-1] == '/') and (u[0] == '/'):  u = url + u[1:]
102 |                     else: u = url+u
103 |                 elif self.url:
104 |                     if (self.url[-1] == '/') and (u[0] == '/'):  u = self.url + u[1:]
105 |                     else: u = self.url+u
106 |                 else: continue
107 |             self.links.append(u)
108 |         return self.links
109 | 
110 | def extract_body(html):
111 |     """
112 |      Extract the body text of a web page
113 |     """
114 |     g = Goose({'enable_image_fetching':False})
115 |     article = g.extract(raw_html=html)
116 |     del g
117 |     return article.cleaned_text
118 | 
119 | def extract_title(html):
120 |     """
121 |      Extract the body title of a web page
122 |     """
123 |     g = Goose({'enable_image_fetching':False})
124 |     article = g.extract(raw_html=html)
125 |     del g
126 |     return article.title
127 | 
128 | 
129 | def summarise(article):
130 |     stopnum = c = 0
131 |     for i,v in enumerate(article.split()):
132 |         if v.endswith('.'):
133 |             if c >= 2:
134 |                 stopnum = i+1
135 |                 break
136 |             else:
137 |                 c += 1
138 |     return ' '.join(article.split()[:stopnum])
139 | 
140 | 


--------------------------------------------------------------------------------
/emissary/controllers/scripts.py:
--------------------------------------------------------------------------------
 1 | #! _*_ coding: utf-8 _*_
 2 | # This file provides scripting capabilities
 3 | import os
 4 | from emissary import app
 5 | from emissary.controllers.utils import sha1sum
 6 | 
 7 | class Scripts(object):
 8 | 
 9 | 	def __init__(self, dir):
10 | 		self.dir = None
11 | 		self.scripts = {}
12 | 
13 | 		dir = os.path.abspath(dir)
14 | 		if not os.path.isdir(dir):
15 | 			app.log("%s isn't a valid system path." % dir, "error")
16 | 			return
17 | 
18 | 		self.dir = dir
19 | 
20 | 	def reload(self, *args): # args caught for SIGHUP handler
21 | 
22 | 		if self.dir:
23 | 			if self.scripts:
24 | 				app.log("Reloading scripts.")
25 | 			for file in os.listdir(self.dir):
26 | 				self.unload(file)
27 | 				self.load(file)
28 | 
29 | 	def load(self, file):
30 | 
31 | 		file = os.path.abspath(os.path.join(self.dir, file))
32 | 		
33 | 		for script in self.scripts.values():
34 | 			if script.file == file: return
35 | 
36 | 		if os.path.isfile(file):
37 | 			self.scripts[file] = Script(file)
38 | 			app.log("Loaded %s" % file)
39 | 
40 | 	def unload(self, file):
41 | 		file = os.path.abspath(os.path.join(self.dir, file))
42 | 
43 | 		if file in self.scripts:
44 | 			del self.scripts[file]
45 | 
46 | class Script(object):
47 |     """
48 |     Represents the execution environment for a third-party script.
49 |     We send custom values into the environment and work with whatever's left.
50 |     Scripts can also call any methods on objects put in their environment.
51 |     """
52 |     def __init__(self, file=None, env={}):
53 |         self.read_on_exec = app.debug
54 |         self.file = file
55 |         self.env = env
56 |         self.script = ''
57 |         self.code = None
58 |         self.hash = None
59 |         self.cache = {
60 |             'app': app
61 |         }
62 | 
63 |     def execute(self, env={}):
64 |         if not self.code or self.read_on_exec: self.compile()
65 |         if env: self.env = env
66 |         self.env['cache'] = self.cache
67 |         exec self.code in self.env
68 |         del self.env['__builtins__']
69 |         if 'cache' in self.env.keys():
70 |             self.cache = self.env['cache']
71 |         return (self.env)
72 | 
73 |     def compile(self, script=''):
74 |         if self.file:
75 |             f = file(self.file, 'r')
76 |             self.script = f.read()
77 |             f.close()
78 |         elif script:
79 |             self.script = script
80 |         if self.script:
81 |             hash = sha1sum(self.script)
82 |             if self.hash != hash:
83 |                 self.hash = hash
84 |                 self.code = compile(self.script, '<string>', 'exec')
85 |             self.script = ''
86 | 
87 |     def __getitem__(self, key):
88 |         if key in self.env.keys():
89 |             return (self.env[key])
90 |         else:
91 |             raise (KeyError(key))
92 | 
93 | 	def keys(self):
94 | 		return self.env.keys()
95 | 


--------------------------------------------------------------------------------
/emissary/controllers/tui.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | from emissary.controllers.utils import tconv
  4 | from window import Window, Pane, ALIGN_LEFT, EXPAND, palette
  5 | 
  6 | class EmissaryMenu(Pane):
  7 |     """
  8 |     Defines a menu where items call local methods.
  9 |     """
 10 |     geometry = [EXPAND, EXPAND]
 11 |     # Default and selection colours.
 12 |     col = [-1, -1] # fg, bg
 13 |     sel = [-1,  "blue"]
 14 |     items = []
 15 | 
 16 |     def update(self):
 17 |         for i, item in enumerate(self.items):
 18 |             if item[0]:
 19 |                 colours = palette(self.sel[0], self.sel[1])
 20 |             else:
 21 |                 colours = palette(self.col[0], self.col[1])
 22 |             text = ' ' + item[1]
 23 |             spaces = ' ' * (self.width - len(text)) 
 24 |             text += spaces
 25 |             self.change_content(i, text + '\n', ALIGN_LEFT, colours)
 26 | 
 27 |     def process_input(self, character):
 28 |         # Handle the return key and the right arrow key
 29 |         if character == 10 or character == 13 or character == 261:
 30 |             for i, item in enumerate(self.items):
 31 |                 if item[0]:    
 32 |                     func = getattr(self, item[2].lower(), None)
 33 |                     if func:
 34 |                         func()
 35 | 
 36 |         # Handle navigating the menu
 37 |         elif character in [259, 258, 339, 338]:
 38 |             for i, item in enumerate(self.items):
 39 |                 if item[0]:    
 40 |                     if character == 259: # up arrow
 41 |                         if i == 0: break
 42 |                         item[0] = 0
 43 |                         self.items[i-1][0] = 1
 44 |                         break
 45 |                     if character == 258: # down arrow
 46 |                         if i+1 >= len(self.items): break
 47 |                         item[0] = 0
 48 |                         self.items[i+1][0] = 1
 49 |                         break
 50 |                     if character == 339: # page up
 51 |                         item[0] = 0
 52 |                         self.items[0][0] = 1
 53 |                         break
 54 |                     if character == 338: # page down
 55 |                         item[0] = 0
 56 |                         self.items[-1][0] = 1
 57 |                         break
 58 | 
 59 | class FeedGroups(EmissaryMenu):
 60 |     geometry = [EXPAND, EXPAND]
 61 |     def update(self):
 62 |         if not self.items:
 63 |             (res, status) = self.window.c.get("feeds")
 64 |             
 65 | 
 66 | class Feeds(EmissaryMenu):
 67 |     geometry = [EXPAND, EXPAND]
 68 |     items = []
 69 | 
 70 | 
 71 | class Articles(Pane):
 72 |     """
 73 |     items for Articles are [1, "text", "uid"]
 74 |     """
 75 |     geometry = [EXPAND, EXPAND]
 76 |     items = []
 77 |     col = [-1, -1] # fg, bg
 78 |     sel = ["black",  "white"]
 79 |     avail = ["black", "green"]
 80 | 
 81 |     def update(self):
 82 |         if not self.items:
 83 |             self.fetch_items()
 84 | 
 85 |         for i, item in enumerate(self.items):
 86 |             if item[0]:
 87 |                 if item[3]:
 88 |                     colours = palette(self.avail[0], self.avail[1])
 89 |                 else:
 90 |                     colours = palette(self.sel[0], self.sel[1])
 91 |             else:
 92 |                 colours = palette(self.col[0], self.col[1])
 93 |             text = ' ' + item[1]
 94 |             spaces = ' ' * (self.width - len(text)) 
 95 |             text += spaces
 96 |             self.change_content(i, text + '\n', ALIGN_LEFT, colours)
 97 | 
 98 |     def process_input(self, character):
 99 |         # Handle the return key and the right arrow key
100 |         if character in [10, 13, 261]:
101 |             for i, item in enumerate(self.items):
102 |                 if item[0]:    
103 |                     uid = item[2]
104 |                     (article, status) = self.window.c.get('articles/' + uid)
105 |                     statuspane = self.window.get("status")
106 | 
107 |                     if status != 200:
108 |                         statuspane.status = str(status)
109 |                     else:
110 |                         self.reader.article = article
111 |                         if article['content'] == None:
112 |                             self.reader.data = ""
113 |                         else:
114 |                             self.reader.data = article['content'].encode("ascii", "ignore")
115 |                         self.reader.active = True
116 |                         self.active = False
117 | 
118 |         elif character == 114:             # r to refresh
119 |             self.fetch_items()
120 | 
121 |         elif character == 9:               # tab to reader
122 |             reader = self.window.get("reader")
123 |             reader.active = True
124 |             self.active   = False
125 | 
126 |         # Handle navigating the menu
127 |         elif character in [259, 258, 339, 338]:
128 |             for i, item in enumerate(self.items):
129 |                 if item[0]:    
130 |                     if character == 259: # up arrow
131 |                         if i == 0: break
132 |                         item[0] = 0
133 |                         self.items[i-1][0] = 1
134 |                         break
135 |                     if character == 258: # down arrow
136 |                         if i+1 >= len(self.items): break
137 |                         item[0] = 0
138 |                         self.items[i+1][0] = 1
139 |                         break
140 |                     if character == 339: # page up
141 |                         item[0] = 0
142 |                         self.items[0][0] = 1
143 |                         break
144 |                     if character == 338: # page down
145 |                         item[0] = 0
146 |                         self.items[-1][0] = 1
147 |                         break
148 | 
149 |     def fetch_items(self):
150 |         (res, status) = self.window.c.get("articles?per_page=%i" % self.height)
151 |         if status == 200:
152 |             self.fill_menu(res)
153 |         else:
154 |             status = self.window.get("status")
155 |             status.status = str(res)
156 | 
157 |     def fill_menu(self, res):
158 |         self.items = []
159 |         self.content = []
160 |         for r in res["data"]:
161 |             self.items.append([0, r['title'].encode("ascii", "ignore"), r['uid'], r['content_available']])
162 |         if self.items:
163 |             self.items[0][0] = 1
164 | 
165 | class Reader(Pane):
166 |     """
167 |     Defines a scrolling pager for long multi-line strings.
168 |     """
169 |     geometry  = [EXPAND, EXPAND]
170 |     data      = ""
171 |     outbuffer = ""
172 |     position  = 0
173 |     article   = None
174 | 
175 |     def update(self):
176 |         if self.article:
177 |             feed = self.article.get('feed', None)
178 |             heading = "%s\n%s (%s %s ago)\n%s\n\n" % \
179 |                 (self.article['title'].encode("ascii","ignore"), feed if feed else "",
180 |                 self.article['uid'], tconv(int(time.time()) - int(self.article['created'])),
181 |                 self.article['url'])
182 |             self.change_content(0, heading)
183 |         self.outbuffer = self.data.split('\n')[self.position:]
184 |         self.change_content(1, '\n'.join(self.outbuffer))
185 | 
186 |     def process_input(self, character):
187 |         self.window.window.clear()
188 |         if character == 259:                       # Up arrow
189 |             if self.position != 0:
190 |                 self.position -= 1
191 |         elif character == 258:                     # Down arrow
192 |             self.position += 1
193 |         elif character == 339:                     # Page up
194 |             if self.position - self.height < 0:
195 |                 self.position = 0
196 |             else:
197 |                 self.position -= self.height
198 |         elif character == 338:                     # Page down
199 |             if not self.position + self.height > len(self.data.split('\n')):
200 |                 self.position += self.height
201 | 
202 |         elif character in [260, 9]:                # Left arrow or tab
203 |             articles = self.window.get("articles")
204 |             articles.active = True
205 |             self.active = False
206 | 
207 |         elif character in [70, 102]:               # f/F to fullscreen the pager
208 |             articles = self.window.get("articles")
209 |             if articles.hidden:
210 |                 articles.hidden = False
211 |             else:
212 |                 articles.hidden = True
213 | 
214 | class StatusLine(Pane):
215 |     geometry = [EXPAND, 1]
216 |     content = []
217 |     buffer = ""
218 |     status = ""
219 |     searching = False
220 |     tagline = "Thanks God."
221 | 
222 |     def update(self):
223 |         if self.searching:
224 |             self.change_content(0, "/"+self.buffer, palette("black", "white"))
225 |         else:
226 |             state = self.tagline
227 |             state += ' ' * ((self.width /2) - len(self.tagline) - (len(str(self.status))/2))
228 |             state += str(self.status)
229 |             self.change_content(0, state)
230 | 
231 |     def process_input(self, character):
232 |         self.window.window.clear()
233 |         if not self.searching and character in [80, 112]: # p/P to enter a python REPL
234 |             try:                                          # You might need to
235 |                 import pprint                             # "sudo pip install ptpython"
236 |                 from ptpython.repl import embed           # to enable this feature.
237 |                 
238 |                 def configure(repl):
239 |                     repl.prompt_style                   = "ipython"
240 |                     repl.vi_mode                        = True
241 |                     repl.confirm_exit                   = False
242 |                     repl.show_status_bar                = False
243 |                     repl.show_line_numbers              = True
244 |                     repl.show_sidebar_help              = False
245 |                     repl.highlight_matching_parenthesis = True
246 |                     repl.use_code_colorscheme("native")
247 | 
248 |                 def a(uid):
249 |                     """
250 |                     Return raw article text given an article uid.
251 |                     """
252 |                     response = self.window.c.get("articles/%s" % uid)
253 |                     if response[1] == 200:
254 |                         return response[0]['content']
255 |                     return ""
256 |                 
257 |                 p = pprint.PrettyPrinter()
258 |                 p = p.pprint
259 |                 l = {"a": a, "c": self.window.c, "p": p, "window": self.window}
260 |                 reader  = self.window.get("reader")
261 |                 article = getattr(reader, "article", None)
262 |                 if article:
263 |                     l['article'] = article
264 | 
265 |                 self.window.stop()
266 |                 print("\nStarting REPL. ^D to exit.")
267 |                 embed(locals=l, configure=configure)
268 |                 self.window.start()
269 |             except ImportError:
270 |                 pass
271 | 
272 |         if not self.searching and character == 47: # / to search
273 |             articles = self.window.get("articles")
274 |             articles.active = False
275 |             self.searching = True
276 |             return
277 | 
278 |         if self.searching:
279 |             self.window.window.clear()
280 |             if character == 23 and self.buffer:    # Clear buffer on ^W
281 |                 self.buffer = ''
282 |             elif character == 263:                 # Handle backspace
283 |                 if self.buffer:
284 |                     self.buffer = self.buffer[:-1]
285 |                 if not self.buffer:
286 |                     self.searching = False
287 |                     articles = self.window.get("articles")
288 |                     articles.active = True
289 | 
290 |             elif character == 10 or character == 13:     # Handle the return key
291 |                 # Pass control back to the articles view
292 |                 self.searching = False
293 |                 articles = self.window.get("articles")
294 |                 articles.active = True
295 |                 reader   = self.window.get("reader")
296 |                 reader.active = False
297 |                 self.buffer = ""
298 |             else:
299 |                 try: self.buffer += chr(character)     # Append input to buffer
300 |                 except: pass
301 |                 # Perform a search for what's in the current buffer.
302 |                 articles = self.window.get("articles")
303 |                 url = "articles/search/"+self.buffer+"?per_page=" + str(articles.height)
304 |                 (res, status) = self.window.c.get(url)
305 |                 if status == 200:
306 |                     articles.fill_menu(res)
307 | 
308 | 
309 | window = Window(blocking=True)
310 | 
311 | feedgroups = FeedGroups("feedgroups")
312 | feedgroups.active = False
313 | feedgroups.hidden = True
314 | feeds             = Feeds("feeds")
315 | feeds.active      = False
316 | feeds.hidden      = True
317 | articles          = Articles("articles")
318 | reader            = Reader("reader")
319 | reader.wrap       = True
320 | reader.active     = False
321 | articles.reader   = reader
322 | status = StatusLine("status")
323 | 
324 | panes = [feedgroups, feeds, articles, reader]
325 | window.add(panes)
326 | window.add(status)
327 | 
328 | window.exit_keys.append(4) # ^D to exit
329 | 


--------------------------------------------------------------------------------
/emissary/controllers/utils.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding: utf-8 _*_
  2 | # This file defines a nifty utility for querying the database,
  3 | # gzipping requests thanks to a snippet on pocoo.org and unique ID generation.
  4 | import gzip
  5 | import uuid
  6 | import urllib
  7 | import hashlib
  8 | import urlparse
  9 | import functools 
 10 | from emissary import app, db
 11 | from sqlalchemy import or_, and_
 12 | from cStringIO import StringIO as IO
 13 | from flask import after_this_request, request
 14 | from emissary.controllers.cron import parse_timings
 15 | 
 16 | def sha1sum(text):
 17 |     return(hashlib.sha1(text).hexdigest())
 18 | 
 19 | def cors(f):
 20 |     if not 'ENABLE_CORS' in app.config or not app.config['ENABLE_CORS']:
 21 |         return f
 22 | 
 23 |     @functools.wraps(f)
 24 |     def view_func(*args, **kwargs):
 25 |         @after_this_request
 26 |         def enable_cors(response):
 27 |             response.headers['Access-Control-Allow-Headers'] = "Cache-Control, Pragma, Origin, Authorization, Content-Type, X-Requested-With, Accept"
 28 |             response.headers['Access-Control-Allow-Methods'] = "OPTIONS, GET, POST, PUT, DELETE"
 29 |             response.headers['Access-Control-Allow-Origin']  = "*"
 30 | 
 31 |             return response
 32 |         
 33 |         return f(*args, **kwargs)
 34 |     
 35 |     return view_func
 36 | 
 37 | def gzipped(f):
 38 |     if not 'GZIP_HERE' in app.config or not app.config['GZIP_HERE']:
 39 |         return f
 40 | 
 41 |     @functools.wraps(f)
 42 |     def view_func(*args, **kwargs):
 43 | 
 44 |         @after_this_request
 45 |         def zipper(response):
 46 |             accept_encoding = request.headers.get('Accept-Encoding', '')
 47 | 
 48 |             if 'gzip' not in accept_encoding.lower():
 49 |                 return response
 50 | 
 51 |             response.direct_passthrough = False
 52 | 
 53 |             if (response.status_code < 200 or
 54 |                 response.status_code >= 300 or
 55 |                 'Content-Encoding' in response.headers):
 56 |                 return response
 57 |             gzip_buffer = IO()
 58 |             gzip_file = gzip.GzipFile(mode='wb', 
 59 |                                       fileobj=gzip_buffer)
 60 |             gzip_file.write(response.data)
 61 |             gzip_file.close()
 62 | 
 63 |             response.data = gzip_buffer.getvalue()
 64 |             response.headers['Content-Encoding'] = 'gzip'
 65 |             response.headers['Vary'] = 'Accept-Encoding'
 66 |             response.headers['Content-Length'] = len(response.data.replace(' ',''))
 67 | 
 68 |             return response
 69 | 
 70 |         return f(*args, **kwargs)
 71 | 
 72 |     return view_func
 73 | 
 74 | def uid(): return str(uuid.uuid4())
 75 | 
 76 | def tconv(seconds):
 77 |     minutes, seconds = divmod(seconds, 60)
 78 |     hours, minutes   = divmod(minutes, 60)
 79 |     days, hours      = divmod(hours, 24)
 80 |     weeks, days      = divmod(days, 7)
 81 |     s=""
 82 |     if weeks:
 83 |         if weeks == 1:
 84 |             s+= "1 week, "
 85 |         else:
 86 |             s+= "%i weeks, " % (weeks)
 87 |     if days:
 88 |         if days == 1:
 89 |             s+= "1 day, "
 90 |         else:
 91 |             s+= "%i days, " % (days)
 92 |     if hours:
 93 |         if hours == 1:
 94 |             s+= "1 hour, "
 95 |         else:
 96 |             s+= "%i hours, " % (hours)
 97 |     if minutes:
 98 |         if minutes == 1:
 99 |             s+= "1 minute"
100 |         else:
101 |             s+= "%i minutes" % (minutes)
102 |     if seconds:
103 |         if len(s) > 0:
104 |             if seconds == 1:
105 |                 s+= " and %i second" % (seconds)
106 |             else:
107 |                 s+= " and %i seconds" % (seconds)
108 |         else:
109 |             if seconds == 1:
110 |                 s+= "1 second"
111 |             else:
112 |                 s+= "%i seconds" % (seconds)
113 |     return s
114 | 
115 | def spaceparse(string):
116 |     """
117 |     Return strings surrounded in quotes as a list, or dict if they're key="value".
118 |     """
119 |     results = []
120 |     quotes = string.count('"')
121 |     quoted = quotes / 2
122 |     keyvalue = False
123 | 
124 |     # Return an empty resultset if there are an uneven number of quotation marks
125 |     if quotes % 2 != 0:
126 |         return results
127 | 
128 |     # for every quoted phrase determine if it's an assignment and include the variable name
129 |     # disregard it from the string we're working with and continue onto the next quoted part
130 |     for phrase in range(0,quoted+1):
131 |         if not string: break
132 |         start = string.find('"')
133 |         end = string.find('"', start+1)
134 | 
135 |         if start > 0 and string[start-1] == '=':
136 |             keyvalue = True
137 |             for i in range(start,-1,-1):
138 |                 if string[i] == ' ' or i == 0:
139 |                     results.append(string[i:end])
140 |                     break
141 |         else:
142 |             results.append(string[start+1:end])
143 |         string = string[end+1:]
144 |     if keyvalue:
145 |         res = {}
146 |         for item in results:
147 |             k,v = item.split('=')
148 |             if k.startswith(' '):
149 |                 k=k[1:]
150 |             if v.startswith('"'):
151 |                 v=v[1:]
152 |             res[k]=v
153 |         return res
154 |     return results
155 | 
156 | def update_url(url, params):
157 |     url_parts = list(urlparse.urlparse(request.url))
158 |     query = dict(urlparse.parse_qsl(url_parts[4]))
159 |     query.update(params)
160 |     url_parts[4] = urllib.urlencode(query)
161 |     return urlparse.urlunparse(url_parts)
162 | 
163 | def make_response(url, query, jsonify=True):
164 |     """
165 |      Take a paginated SQLAlchemy query and return
166 |      a response that's more easily reasoned about
167 |      by other programs.
168 |     """
169 |     response = {}
170 |     if jsonify:
171 |         response['data'] = [i.jsonify() for i in query.items]
172 | 
173 |     response['links'] = {}
174 |     response['links']['self'] = url
175 |     if query.has_next:
176 |         response['links']['next'] = update_url(url, {"page": str(query.next_num)})
177 |     return response
178 | 


--------------------------------------------------------------------------------
/emissary/models.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding: utf-8 _*_
  2 | """
  3 | MIT License.
  4 | Luke Brooks 2015
  5 | Database layout for Emissary.
  6 | """
  7 | import time
  8 | import snappy
  9 | from hashlib import sha256
 10 | from emissary import db, app
 11 | from multiprocessing import Queue
 12 | from emissary.controllers.utils import uid
 13 | 
 14 | class APIKey(db.Model):
 15 |     """
 16 |     An Emissary API Key.
 17 |     Reader keys cannot PUT, POST or DELETE.
 18 |     """
 19 |     __tablename__ = 'api_keys'
 20 |     id         = db.Column(db.Integer, primary_key=True)
 21 |     parent_id  = db.Column(db.Integer(), db.ForeignKey("api_keys.id"))
 22 |     name       = db.Column(db.String(80))
 23 |     key        = db.Column(db.String(120))
 24 |     active     = db.Column(db.Boolean())
 25 |     reader     = db.Column(db.Boolean(), default=False)
 26 |     created    = db.Column(db.DateTime(timezone=True), default=db.func.now())
 27 |     parent     = db.relationship("APIKey", backref="readers", remote_side=[id])
 28 |     feedgroups = db.relationship("FeedGroup", backref="key")
 29 |     feeds      = db.relationship("Feed", backref="key")
 30 |     articles   = db.relationship("Article", backref="key")
 31 |     events     = db.relationship("Event", backref="key")
 32 | 
 33 |     def generate_key_str(self):
 34 |         """
 35 |         Returns a SHA256 of the time as an API Key.
 36 |         """
 37 |         return sha256(time.asctime() + str(time.time())).hexdigest()
 38 | 
 39 |     def __repr__(self):
 40 |         if not self.name:
 41 |             return "<APIKey>"
 42 |         return '<APIKey "%s">' % self.name
 43 | 
 44 |     def jsonify(self, feedgroups=False, with_key_str=False):
 45 |         response = {}
 46 |         response['name']       = self.name
 47 |         if with_key_str:
 48 |             response['apikey'] = self.key
 49 |         if feedgroups:
 50 |             response['feedgroups'] = [group.jsonify() for group in self.feedgroups]
 51 |         response['active'] = self.active
 52 |         response['reader'] = self.reader
 53 |         if self.reader:
 54 |             response['parent'] = self.parent.name
 55 |         return response
 56 | 
 57 | class FeedGroup(db.Model):
 58 |     __tablename__ = "feed_groups"
 59 |     id      = db.Column(db.Integer(), primary_key=True)
 60 |     key_id  = db.Column(db.Integer(), db.ForeignKey("api_keys.id"))
 61 |     uid     = db.Column(db.String(36), default=uid())
 62 |     name    = db.Column(db.String(80))
 63 |     feeds   = db.relationship('Feed', backref="group")
 64 |     created = db.Column(db.DateTime(timezone=True), default=db.func.now())
 65 |     active  = db.Column(db.Boolean(), default=True)
 66 | 
 67 |     def __repr__(self):
 68 |         if self.name:
 69 |             return '<FeedGroup "%s" with %i feeds>' % (self.name, len(self.feeds))
 70 |         return "<FeedGroup>"
 71 | 
 72 |     def jsonify(self):
 73 |         response = {}
 74 |         if self.created:
 75 |             response['name'] = self.name
 76 |             response['uid'] = self.uid
 77 |             response['created'] = time.mktime(self.created.timetuple())
 78 |             response['active'] = self.active
 79 |             response['feeds'] = [feed.jsonify() for feed in self.feeds]
 80 |         return response
 81 | 
 82 | class Feed(db.Model):
 83 |     __tablename__ = "feeds"
 84 |     id       = db.Column(db.Integer(), primary_key=True)
 85 |     key_id   = db.Column(db.Integer(), db.ForeignKey("api_keys.id"))
 86 |     group_id = db.Column(db.Integer(), db.ForeignKey("feed_groups.id"))
 87 |     uid      = db.Column(db.String(36),  default=uid())
 88 |     name     = db.Column(db.String(100))
 89 |     url      = db.Column(db.String(150))
 90 |     schedule = db.Column(db.String(80))
 91 |     active   = db.Column(db.Boolean(), default=True)
 92 |     created  = db.Column(db.DateTime(timezone=True), default=db.func.now())
 93 |     articles = db.relationship('Article', backref="feed")
 94 | 
 95 |     def __repr__(self):
 96 |         if self.name:
 97 |             return '<Feed "%s" with %i articles>' % (self.name, len(self.articles))
 98 |         return "<Feed>"
 99 | 
100 |     def is_running(self):
101 |         """
102 |          Ask the feedmanager what's happening.
103 |         """
104 |         if not app.inbox:
105 |             return None
106 |         
107 |         response_queue = app.queues[-1]
108 |         qid = hex(id(response_queue))
109 |         app.inbox.put([qid, "check", self])
110 | 
111 |         # Wait somewhere around 500ms max for a response
112 |         then = time.time()
113 |         while response_queue.empty():
114 |             now = time.time()
115 |             if (now - then) >= 0.5:
116 |                 return None
117 | 
118 |         return response_queue.get()
119 | 
120 |     def jsonify(self, articles=False):
121 |         response = {}
122 |         if self.created:
123 |             response['name']          = self.name
124 |             response['uid']           = self.uid
125 |             response['url']           = self.url
126 |             response['created']       = time.mktime(self.created.timetuple())
127 |             response['schedule']      = self.schedule
128 |             response['active']        = self.active
129 |             response['article_count'] = len(self.articles)
130 |             response['running']       = self.is_running()
131 |         if self.group:
132 |             response['group'] = self.group.name
133 |         else:
134 |             response['group'] = None
135 |         return response
136 | 
137 | 
138 | class Article(db.Model):
139 |     __tablename__ = "articles"
140 |     id         = db.Column(db.Integer(), primary_key=True)
141 |     key_id     = db.Column(db.Integer(), db.ForeignKey("api_keys.id"))
142 |     uid        = db.Column(db.String(36))
143 |     feed_id    = db.Column(db.Integer(), db.ForeignKey("feeds.id"))
144 |     title      = db.Column(db.String(80))
145 |     url        = db.Column(db.String(400))
146 |     content    = db.Column(db.String(2000))
147 |     ccontent   = db.Column(db.LargeBinary())
148 |     summary    = db.Column(db.String(800))
149 |     created    = db.Column(db.DateTime(timezone=True), default=db.func.now())
150 |     compressed = db.Column(db.Boolean(), default=False)
151 | 
152 |     def text(self):
153 |         if self.content:
154 |             return self.content.decode("utf-8", "ignore")
155 |         if self.ccontent:
156 |             return snappy.decompress(self.ccontent).decode("utf-8", "ignore")
157 |         return ""
158 | 
159 |     def __repr__(self):
160 |         if self.content or self.ccontent:
161 |             return '<Article "%s">' % self.title.encode("utf-8", "ignore")
162 |         if self.url and self.title:
163 |             return '<Article reference to "%s">' % self.title.encode("utf-8", "ignore")
164 |         return "<Article>"
165 | 
166 |     def jsonify(self, summary=False, content=False):
167 |         response = {}
168 |         if self.title:
169 |             response['title']       = self.title.encode("utf-8", "ignore")
170 |             response['url']         = self.url.encode("utf-8", "ignore")
171 |             response['uid']         = self.uid
172 |             response['created']     = time.mktime(self.created.timetuple())
173 |         if self.feed:
174 |             response['feed']        = self.feed.name
175 |         if content:
176 |             response['compressed']  = self.compressed
177 |             if self.ccontent:
178 |                 response['content'] = snappy.decompress(self.ccontent)
179 |             else:
180 |                 response['content'] = self.content
181 |         if not content:
182 |             if self.content or self.ccontent:
183 |                 response['content_available'] = True
184 |             else:
185 |                 response['content_available'] = False
186 |         if summary and self.summary:
187 |             response['summary'] = self.summary
188 |         return response
189 | 
190 | class Event(db.Model):
191 |     __tablename__ = "events"
192 |     id      = db.Column(db.Integer(), primary_key=True)
193 |     key_id  = db.Column(db.Integer(), db.ForeignKey("api_keys.id"))
194 |     created = db.Column(db.DateTime(timezone=True), default=db.func.now())
195 |     feed_id = db.Column(db.Integer(), db.ForeignKey("feeds.id"))
196 |     success = db.Column(db.Boolean())
197 |     message = db.Column(db.String(200))
198 | 
199 |     def __repr__(self):
200 |         return "<Event>"
201 | 
202 |     def jsonify(self):
203 |         return {}
204 | 


--------------------------------------------------------------------------------
/emissary/repl.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys
  3 | import cmd
  4 | import json
  5 | import time
  6 | import errno
  7 | import _curses
  8 | import optparse
  9 | import textwrap
 10 | from emissary import app
 11 | from emissary.client import Client
 12 | from emissary.models import APIKey
 13 | from subprocess import Popen, PIPE
 14 | from emissary.controllers.utils import tconv, spaceparse
 15 | from emissary.controllers.tui import window
 16 | 
 17 | try:
 18 |     from pygments import highlight
 19 |     from pygments.lexers import JsonLexer
 20 |     from pygments.styles import get_style_by_name, STYLE_MAP
 21 |     from pygments.formatters.terminal256 import Terminal256Formatter
 22 | except ImportError: highlight = False
 23 | 
 24 | class repl(cmd.Cmd):
 25 | 
 26 |     prompt = "> "
 27 |     intro = "Emissary %s\nPsybernetics %i\n" % (app.version, time.gmtime()[0])
 28 |     ruler = '-'
 29 |     width = 80
 30 | 
 31 | 
 32 |     def parse_args(self, args):
 33 |         body = {}
 34 |         parsed = spaceparse(args)
 35 |         args = args.split()
 36 |         for i in args:
 37 |             try:
 38 |                 x=i.split('=')
 39 |                 if type(parsed) == dict and not x[0] in parsed:
 40 |                     parsed[x[0]] = x[1]
 41 |                 else:
 42 |                     body[x[0]] = x[1]
 43 |             except: continue
 44 |         if type(parsed) == dict: body = parsed
 45 |         return body
 46 | 
 47 |     def formatted_prompt(self):
 48 |         """
 49 |          Here we format the first return value of /v1/articles/count
 50 |          into something that adds commas to triple digit (etc) values.
 51 |         """
 52 |         try:
 53 |             return "({:,}) > ".format(
 54 |                 self.c.get("articles/count")[0]
 55 |             )
 56 |         except:
 57 |             return "no connection> "
 58 | 
 59 |     def do_setkey(self,key):
 60 |         "Sets the API key to transmit requests with."
 61 |         if key:
 62 |             self.c.key = key
 63 |             print 'Changed active API key to "%s"' % key
 64 |         else:
 65 |             print "Usage: setkey <key>"
 66 | 
 67 |     def do_use(self,key):
 68 |         "Alias of setkey."
 69 |         self.do_setkey(key)
 70 | 
 71 |     def do_getkey(self,line):
 72 |         "Displays the current API key."
 73 |         print self.c.key
 74 | 
 75 |     def do_get(self,line):
 76 |         """
 77 |         Sends GET requests
 78 |         EG: get articles
 79 |             get feeds
 80 |             get feedgroups
 81 |         """
 82 |         response = self.c._send_request(line)
 83 |         self.display(response)
 84 | 
 85 |     def do_put(self,line):
 86 |         """
 87 |         Creates a new feed or feed group.
 88 |         EG: put feedgroups name=HN
 89 |         """
 90 |         if not ' ' in line:
 91 |             print "Need data to transmit."
 92 |         else:
 93 |             line, body = line.split(' ',1)
 94 |             body = self.parse_args(body)
 95 |             response = self.c._send_request(line, 'PUT', body)
 96 |             self.display(response)
 97 | 
 98 | 
 99 |     def do_post(self,line):
100 |         """
101 |         Modifies an existing feed or feed group.
102 |         EG: post feeds/SomeFeed schedule="20 3 2! * *"
103 |         """
104 | 
105 |         if not ' ' in line:
106 |             print "Need data to transmit."
107 |         else:
108 |             line, body = line.split(' ',1)
109 |             body = self.parse_args(body)
110 |             response = self.c._send_request(line, 'POST', body)
111 |             self.display(response)
112 | 
113 |     def do_exit(self,line):
114 |         try:
115 |             _curses.endwin()
116 |         except _curses.error:
117 |             pass
118 |         finally:
119 |             raise SystemExit
120 | 
121 |     def do_read(self,line):
122 |         """
123 |         Usage: read <article_uid>
124 |         Pipes article content into the system pager.
125 | 
126 |         Text column width can be configured with the width command.
127 |         """
128 |         then = time.time()
129 |         response = self.c._send_request("articles/" + line)
130 |         if response[1] != 200:
131 |             print response[1]
132 |             return
133 | 
134 |         data = response[0]
135 | 
136 |         if not 'content' in data:
137 |             print None
138 |         else:
139 | 
140 |             p = Popen(['less', '-P', data['title']], stdin=PIPE)
141 | 
142 |             try:
143 |                 duration = tconv(int(then) - int(data['created']))
144 |                 p.stdin.write('%s\n(%i paragraphs, fetched %s ago)\n%s\n\n' % \
145 |                     (data['title'].encode("utf-8", "ignore"),
146 |                     len(data['content'].encode("utf-8","ignore").split("\n"))/2+1,
147 |                     duration,
148 |                     data['url'].encode("utf-8","ignore")))
149 | 
150 |                 content = data['content'].encode("utf-8", "ignore")
151 |                 # Get TTY width and wrap the text
152 |                 if self.width == "auto":
153 |                     s = _curses.initscr()
154 |                     width = s.getmaxyx()[1]
155 |                     _curses.endwin()
156 | 
157 |                 else:
158 |                     width = self.width
159 | 
160 |                 content = '\n'.join(
161 |                     textwrap.wrap(content, width, break_long_words=False, replace_whitespace=False)
162 |                 )
163 |                 p.stdin.write(content)
164 | 
165 |             except IOError as e:
166 |                 if e.errno == errno.EPIPE or e.errno == errno.EINVAL:
167 |                     sys.stderr.write("Error writing to pipe.\n")
168 |                 else:
169 |                     raise
170 | 
171 |             p.stdin.close()
172 |             p.wait()
173 |             now = time.time()
174 |             duration = tconv(now-then)
175 | #            print "\n%s" % duration
176 | 
177 |     def do_delete(self,line):
178 |         """
179 |         Sends a DELETE request.
180 |         EG: delete feeds/somefeed
181 |         """
182 |         if ' ' in line:
183 |             line, body = line.split(' ',1)
184 |             body = self.parse_args(body)
185 |         else: body = ''
186 |         response = self.c._send_request(line, 'DELETE', body)
187 |         self.display(response)
188 | 
189 |     def do_EOF(self,line):
190 |         print "^D",
191 |         return True
192 | 
193 |     def postcmd(self, stop, line):
194 |         self.prompt = self.formatted_prompt()
195 |         return stop
196 | 
197 |     def emptyline(self):
198 |         pass
199 | 
200 |     def postloop(self):
201 |         print
202 | 
203 |     def do_width(self, line):
204 |         """
205 |         Set the text width for the read command.
206 |         Acceptable values are an integer amount of characters or "auto".
207 |         """
208 |         if line == "auto":
209 |             self.width = "auto"
210 |         elif line == "":
211 |             print "The current width is set to %s" % str(self.width)
212 |         else:
213 |             try:
214 |                 self.width = int(line)
215 |             except:
216 |                 print "width must be an integer."
217 | 
218 |     def do_search(self, line):
219 |         self.do_get("articles/search/" + line)
220 | 
221 |     def do_style(self, style):
222 |         """
223 |         Usage: style <theme_name>
224 |         Lists the available themes if no
225 |         name is supplied, or sets the theme to use.
226 |         """
227 |         if not self.highlight:
228 |             print "For syntax highlighting you will need to install the Pygments package."
229 |             print "sudo pip install pygments"
230 |             return
231 |         if style:
232 |             self.style = style
233 |             print 'Changed style to "%s"' % style
234 |         else:
235 |             print ', '.join(self.AVAILABLE_STYLES)
236 |             print 'Currently using "%s"' % self.style
237 | 
238 |     def display(self, response):
239 |         if self.highlight:
240 |             print response[1]
241 |             print highlight(json.dumps(response[0],indent=4), JsonLexer(), Terminal256Formatter(style=self.style))
242 |         else: self.c.p(response)
243 | 
244 | def reqwrap(func):
245 |     def wrapper(*args, **kwargs):
246 |         try: return func(*args, **kwargs)
247 |         except: return ({'error':'Connection refused.'}, 000)
248 |     return wrapper
249 | 
250 | 
251 | if __name__ == "__main__":
252 |     parser = optparse.OptionParser(prog="python -m emissary.repl")
253 |     parser.add_option("--host", dest="host", action="store", default='localhost:6362/v1/')
254 |     parser.add_option("--ncurses", dest="ncurses", action="store_true", default=False)
255 |     (options,args) = parser.parse_args()
256 | 
257 |     if options.ncurses:
258 |         r = window
259 |     else:
260 |         r = repl()
261 | 
262 |     r.c = Client('','https://%s' % options.host, verify=False)
263 | 
264 |     r.c.key = ""
265 | 
266 |     try:
267 |         k = APIKey.query.first()
268 |     except Exception, e:
269 |         print "Encountered an error: " + e.message
270 |         print "This either means there's no URI exported as EMISSARY_DATABASE or you've exported a URI"
271 |         print "but haven't given Emissary a first run in order to write the schema and a primary API key."
272 |         raise SystemExit
273 | 
274 |     if k: r.c.key = k.key
275 |     r.c.verify_https = False
276 | 
277 |     if not options.ncurses:
278 |         r.highlight = highlight
279 |         r.prompt = r.formatted_prompt()
280 |         if highlight:
281 |             r.AVAILABLE_STYLES = set(STYLE_MAP.keys())
282 |             if 'tango' in r.AVAILABLE_STYLES: r.style = 'tango'
283 |             else:
284 |                 for s in r.AVAILABLE_STYLES: break
285 |                 r.style = s
286 |     r.c._send_request = reqwrap(r.c._send_request)
287 | 
288 |     try:
289 |         if options.ncurses:
290 |             window.start()
291 |         else:
292 |             r.cmdloop()
293 |     except KeyboardInterrupt:
294 |         print "^C"
295 |         raise SystemExit
296 | 


--------------------------------------------------------------------------------
/emissary/resources/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukeB42/Emissary/31629a8baedc91a9b60c551a01b2b45372b9a8c7/emissary/resources/__init__.py


--------------------------------------------------------------------------------
/emissary/resources/api_key.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding: utf-8 _*_
  2 | # This module determines the behavior of API Keys within the system.
  3 | # You may also want to check the definition of API keys in models.py.
  4 | import re
  5 | from flask import request
  6 | from sqlalchemy import and_
  7 | from emissary import app, db
  8 | from emissary.models import *
  9 | from flask.ext import restful
 10 | from flask.ext.restful import reqparse, abort
 11 | from emissary.controllers.utils import cors, gzipped
 12 | 
 13 | def auth(forbid_reader_keys=False):
 14 |     """
 15 |     Here we determine that inactive keys are invalid
 16 |     and that reader keys are their parent unless forbidden.
 17 |     """
 18 |     if 'Authorization' in request.headers:
 19 |         key_str = request.headers['Authorization'].replace('Basic ', '')
 20 |         key = APIKey.query.filter(APIKey.key == key_str).first()
 21 |         if key and key.active:
 22 |             if key.reader:
 23 |                 if not forbid_reader_keys:
 24 |                     return key.parent
 25 |                 abort(401, message="Forbidden to reader keys.")
 26 |             return key
 27 |     abort(401, message="Invalid API Key.")
 28 | 
 29 | class KeyCollection(restful.Resource):
 30 | 
 31 |     @cors
 32 |     @gzipped
 33 |     def get(self):
 34 |         key = auth()
 35 |         response = key.jsonify(feedgroups=False)
 36 | 
 37 |         if key.name == app.config['MASTER_KEY_NAME'] or key.systemwide:
 38 |             response['system'] = {}
 39 | 
 40 |         if key.name == app.config['MASTER_KEY_NAME']:
 41 |             keys = []
 42 |             for i in APIKey.query.all(): keys.append(i.name)
 43 |             response['system']['keys'] = keys
 44 |             response['system']['permit_new'] = app.config['PERMIT_NEW']
 45 | 
 46 |         return [response]
 47 | 
 48 |     @cors
 49 |     @gzipped
 50 |     def put(self):
 51 |         """
 52 |             This method creates keys under the specified name,
 53 |             presuming config['PERMIT_NEW'] is enabled or the master key is in use.
 54 | 
 55 |             Reader keys (keys that can only perform GET requests) are created by setting
 56 |             the "reader" parameter to a value in the body of the request.
 57 |             They are automatically associated with the requesting key.
 58 |         """
 59 |         key = None
 60 |         parser = reqparse.RequestParser()
 61 |         parser.add_argument("name",type=str, help="Name associated with the key", required=True)
 62 |         parser.add_argument("reader",type=bool, help="Creates a reader key", default=False)
 63 |         args = parser.parse_args()
 64 | 
 65 |         if 'Authorization' in request.headers:
 66 |             key_str = request.headers['Authorization'].replace('Basic ', '')
 67 |             key = APIKey.query.filter(APIKey.key == key_str).first()
 68 |             if key.reader:
 69 |                 abort(401, message="Reader keys cannot create API keys.")
 70 | 
 71 |         # Create a reader key if this request has been made with an existing key
 72 |         if key and args.name and args.reader:
 73 |             new_key = APIKey(name=args.name, active=True, reader=True)
 74 |             new_key.key = new_key.generate_key_str()
 75 |             key.readers.append(new_key)
 76 |             db.session.add(key)
 77 |             db.session.add(new_key)
 78 |             db.session.commit()
 79 | 
 80 |             return new_key.jsonify(with_key_str=True), 201
 81 | 
 82 |         if (key and key.name == app.config['MASTER_KEY_NAME']) or app.config['PERMIT_NEW']:
 83 |             # Permit only simple names (character limit, alphanumeric)
 84 |             if re.match("^$|\s+[a-zA-Z0-9_]+$",args.name) or len(args.name) > 60:
 85 |                 abort(422, message="Invalid key name. Must contain alphanumeric characters.")
 86 |             # Determine if already exists
 87 |             key = APIKey.query.filter(APIKey.name == args.name).first()
 88 | 
 89 |             if key: abort(403, message="A key already exists with this name.")
 90 | 
 91 |             key = APIKey(name=args.name)
 92 |             key.key = key.generate_key_str()
 93 |             key.active = True
 94 |             db.session.add(key)
 95 |             db.session.commit()
 96 | 
 97 |             return key.jsonify(with_key_str=True), 201
 98 | 
 99 |         abort(403, message="This server isn't currently generating new keys.")
100 | 
101 |     @cors
102 |     @gzipped
103 |     def post(self):
104 |         "This method is for updating existing API keys via the master key."
105 | 
106 |         key = auth(forbid_reader_keys=True)
107 | 
108 |         parser = reqparse.RequestParser()
109 |         parser.add_argument("key",type=str, help="API Key")
110 |         parser.add_argument("name",type=str, help="Name associated with the key")
111 |         parser.add_argument("permit_new", type=bool, help="Determines whether new API keys can be created.")
112 |         parser.add_argument("active", type=bool, help="Determines whether a key is active or not.", default=None)
113 |         args = parser.parse_args()
114 | 
115 |         if key.name != app.config['MASTER_KEY_NAME']: abort(403)
116 | 
117 |         response={}
118 |         subject = None
119 | 
120 |         if args.key and args.name:
121 |             subject = APIKey.query.filter(APIKey.key == args.key).first()
122 |             if APIKey.query.filter(APIKey.name == args.name).first():
123 |                 return {'message':"A key already exists with this name."}, 304
124 |             subject.name = args.name
125 |         elif args.name and not args.key:
126 |             subject = APIKey.query.filter(APIKey.name == args.name).first()
127 |         elif args.key and not args.name:
128 |             subject = APIKey.query.filter(APIKey.key == args.key).first()
129 | 
130 |         if not subject: abort(404)
131 | 
132 |         if subject.name == app.config['MASTER_KEY_NAME']: abort(403)
133 |         if args.active or args.active == False: 
134 |             subject.active = args.active
135 | 
136 |             response['key'] = subject.jsonify(with_key_str=True)
137 |             db.session.add(subject)
138 | 
139 |         if (args.permit_new or args.permit_new == False) and key.name == app.config['MASTER_KEY_NAME']:
140 |             app.config['PERMIT_NEW'] = args.permit_new
141 |             response['system'] = {}
142 |             response['system']['permit_new'] = app.config['PERMIT_NEW']
143 | 
144 |         db.session.commit()
145 |         return response
146 | 
147 |     @cors
148 |     @gzipped
149 |     def delete(self):
150 |         # http://docs.sqlalchemy.org/en/rel_0_9/orm/tutorial.html#configuring-delete-delete-orphan-cascade
151 |         key = auth(forbid_reader_keys=True)
152 | 
153 |         parser = reqparse.RequestParser()
154 |         parser.add_argument("key",type=str, help="API Key")
155 |         args = parser.parse_args()
156 | 
157 |         target = APIKey.query.filter(APIKey.key == args.key).first()
158 |         if not target: abort(404, message="Unrecognized key.")
159 | 
160 |         if args.key != key.key and key.name != app.config['MASTER_KEY_NAME']:
161 |             abort(403, message="You do not have permission to remove this key.")
162 |         if key.name == app.config['MASTER_KEY_NAME'] and args.key == key.key:
163 |             abort(403, message="You are attempting to delete the master key.")
164 | 
165 |         for fg in target.feedgroups: db.session.delete(fg)
166 |         for f  in target.feeds:      db.session.delete(f)
167 |         for a  in target.articles:   db.session.delete(a)
168 | 
169 |         db.session.delete(target)
170 |         db.session.commit()
171 |         return {}, 204
172 | 
173 | class KeyResource(restful.Resource):
174 | 
175 |     @cors
176 |     @gzipped
177 |     def get(self, name):
178 |         """
179 |          Permit the administrative key to review another key by name.
180 |         """
181 |         key = auth(forbid_reader_keys=True)
182 |         if key.name != app.config['MASTER_KEY_NAME'] and name != key.name:
183 |             abort(403)
184 | 
185 |         target = APIKey.query.filter_by(name=name).first()
186 |         if target:
187 |             return target.jsonify(feedgroups=True, with_key_str=True)
188 | 
189 |         abort(404, message="Unrecognised key.")
190 | 


--------------------------------------------------------------------------------
/emissary/resources/articles.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding: utf-8 _*_
  2 | # This file determines how articles are accessed.
  3 | # You may also want to examine the Article class in emissary/models.py
  4 | from emissary import db
  5 | from flask import request
  6 | from flask.ext import restful
  7 | from sqlalchemy import desc, and_
  8 | from emissary.models import Article
  9 | from emissary.resources.api_key import auth
 10 | from emissary.controllers.fetch import fetch_feedless_article
 11 | from emissary.controllers.utils import make_response, gzipped, cors
 12 | 
 13 | class ArticleCollection(restful.Resource):
 14 | 
 15 |     @cors
 16 |     def get(self):
 17 |         """
 18 |          Review all articles associated with this key.
 19 |         """
 20 |         key = auth()
 21 | 
 22 |         parser = restful.reqparse.RequestParser()
 23 |         parser.add_argument("page",     type=int,  default=1)
 24 |         parser.add_argument("per_page", type=int,  default=10)
 25 |         parser.add_argument("content",  type=bool, default=None)
 26 |         args = parser.parse_args()
 27 | 
 28 |         # Construct a query for  Articles ordered by descending creation date and paginated.
 29 |         if args.content == True:
 30 |             query = Article.query.filter(and_(Article.key == key, Article.content != None))\
 31 |                     .order_by(desc(Article.created)).paginate(args.page, args.per_page)
 32 |         elif args.content == False:
 33 |             query = Article.query.filter(and_(Article.key == key, Article.content == None))\
 34 |                     .order_by(desc(Article.created)).paginate(args.page, args.per_page)
 35 |         else:
 36 |             query = Article.query.filter(Article.key == key)\
 37 |                     .order_by(desc(Article.created)).paginate(args.page, args.per_page)
 38 | 
 39 |         # Attach links to help consuming applications
 40 |         response = make_response(request.url, query)
 41 |         return response
 42 |     
 43 |     @cors
 44 |     def put(self):
 45 |         """
 46 |          Fetch an article without an associated feed.
 47 |         """
 48 |         key = auth()
 49 |  
 50 |         parser = restful.reqparse.RequestParser()
 51 |         parser.add_argument("url", type=str, required=True)
 52 |         args = parser.parse_args()
 53 | 
 54 |         try:
 55 |             article = fetch_feedless_article(key, args.url)
 56 |         except Exception, e:
 57 |             return {"Error": e.message}
 58 | 
 59 |         if not article:
 60 |             return {"Error": "This URL has already been stored."}, 304
 61 | 
 62 |         return article.jsonify(), 201
 63 | 
 64 | class ArticleSearch(restful.Resource):
 65 | 
 66 |     @cors
 67 |     def get(self, terms):
 68 |         """
 69 |          The /v1/articles/search/<terms> endpoint.
 70 |         """
 71 |         key = auth()
 72 | 
 73 |         parser = restful.reqparse.RequestParser()
 74 |         parser.add_argument("page",     type=int, help="",  default=1)
 75 |         parser.add_argument("per_page", type=int, help="",  default=10)
 76 |         parser.add_argument("content",  type=bool, help="", default=None)
 77 |         args = parser.parse_args()
 78 | 
 79 |         if args.content == True:
 80 |             query = Article.query.filter(
 81 |                         and_(
 82 |                             Article.key == key,
 83 |                             Article.content != None,
 84 |                             Article.title.like("%" + terms + "%")
 85 |                         ))\
 86 |                     .order_by(desc(Article.created)).paginate(args.page, args.per_page)
 87 | 
 88 |             response = make_response(request.url, query)
 89 | 
 90 |             # This method of manually pruning JSON documents because they
 91 |             # don't relate to items that have content can omit them from search
 92 |             # completely. They don't have content but they're showing up here in
 93 |             # content != None rather than content == None.. You could always just
 94 |             # comment out this next for loop
 95 |             for doc in response['data']:
 96 |                 if not doc['content_available']:
 97 |                     response['data'].remove(doc)
 98 |             return response
 99 | 
100 |         elif args.content == False:
101 |             query = Article.query.filter(
102 |                         and_(
103 |                             Article.key == key,
104 |                             Article.content == None,
105 |                             Article.title.like("%" + terms + "%")
106 |                         ))\
107 |                     .order_by(desc(Article.created)).paginate(args.page, args.per_page)
108 |             return make_response(request.url, query)
109 | 
110 |         query = Article.query.filter(
111 |                     and_(Article.key == key, Article.title.like("%" + terms + "%")))\
112 |                 .order_by(desc(Article.created)).paginate(args.page, args.per_page)
113 |         return make_response(request.url, query)
114 | 
115 | class ArticleResource(restful.Resource):
116 | 
117 |     @cors
118 |     def get(self, uid):
119 |         """
120 |          Read an article.
121 |         """
122 |         key = auth()
123 | 
124 |         article = Article.query.filter(and_(Article.key == key, Article.uid == uid)).first()
125 |         if article:
126 |             return article.jsonify(summary=True, content=True)
127 | 
128 |         restful.abort(404)
129 | 
130 |     @cors
131 |     @gzipped
132 |     def delete(self, uid):
133 |         """
134 |          Delete an article.
135 |         """
136 |         key = auth(forbid_reader_keys=True)
137 | 
138 |         article = Article.query.filter(and_(Article.key == key, Article.uid == uid)).first()
139 |         if article:
140 |             db.session.delete(article)
141 |             db.session.commit()
142 |             return {}
143 | 
144 |         restful.abort(404)
145 | 
146 | class ArticleCount(restful.Resource):
147 | 
148 |     @cors
149 |     def get(self):
150 |         """
151 |          Return the amount of articles belonging to an API key.
152 |         """
153 |         key = auth()
154 |         return len(key.articles)
155 | 
156 | 


--------------------------------------------------------------------------------
/emissary/resources/feedgroups.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding: utf-8 _*_
  2 | # This file provides the HTTP endpoints for operating on groups of feeds.
  3 | from emissary import app, db
  4 | from flask import request
  5 | from flask.ext import restful
  6 | from sqlalchemy import and_, desc
  7 | from emissary.resources.api_key import auth
  8 | from emissary.models import FeedGroup, Feed, Article
  9 | from emissary.controllers.cron import CronError, parse_timings
 10 | from emissary.controllers.utils import cors, gzipped, make_response
 11 | 
 12 | class FeedGroupCollection(restful.Resource):
 13 | 
 14 |     @cors
 15 |     @gzipped
 16 |     def get(self):
 17 |         """
 18 |          Paginate an array of feed groups
 19 |          associated with the requesting key.
 20 |         """
 21 |         key = auth()
 22 | 
 23 |         parser = restful.reqparse.RequestParser()
 24 |         parser.add_argument("page",     type=int,  default=1)
 25 |         parser.add_argument("per_page", type=int,  default=10)
 26 |         parser.add_argument("content",  type=bool, default=None)
 27 |         args = parser.parse_args()
 28 | 
 29 |         query = FeedGroup.query.filter(FeedGroup.key == key)\
 30 |                 .order_by(desc(FeedGroup.created)).paginate(args.page, args.per_page)
 31 | 
 32 |         return make_response(request.url, query)
 33 | 
 34 |     @cors
 35 |     @gzipped
 36 |     def put(self):
 37 |         """
 38 |          Create a new feed group, providing the name isn't already in use.
 39 |         """
 40 |         key = auth(forbid_reader_keys=True)
 41 | 
 42 |         parser = restful.reqparse.RequestParser()
 43 |         parser.add_argument("name",   type=str,  required=True)
 44 |         parser.add_argument("active", type=bool, default=True, help="Feed is active", required=False)
 45 |         args = parser.parse_args()
 46 | 
 47 |         # Check for this name already existing in the groups on this key
 48 |         if [fg for fg in key.feedgroups if fg.name == args.name]:
 49 |             return {"message":"Feed group %s already exists." % args.name}, 304
 50 | 
 51 |         fg = FeedGroup(name=args.name, active=args.active)
 52 |         key.feedgroups.append(fg)
 53 |         db.session.add(fg)
 54 |         db.session.add(key)
 55 |         db.session.commit()
 56 | 
 57 |         return fg.jsonify(), 201
 58 | 
 59 | class FeedGroupResource(restful.Resource):
 60 | 
 61 |     @cors
 62 |     @gzipped
 63 |     def get(self, groupname):
 64 |         """
 65 |          Review a specific feed group.
 66 |         """
 67 |         key = auth()
 68 | 
 69 |         fg = FeedGroup.query.filter(and_(FeedGroup.key == key, FeedGroup.name == groupname)).first()
 70 |         if not fg:
 71 |             restful.abort(404)
 72 |         return fg.jsonify()
 73 | 
 74 |     @cors
 75 |     @gzipped
 76 |     def put(self, groupname):
 77 |         """
 78 |          Create a new feed providing the name and url are unique.
 79 |          Feeds must be associated with a group.
 80 |         """
 81 |         key = auth(forbid_reader_keys=True)
 82 | 
 83 |         parser = restful.reqparse.RequestParser()
 84 |         parser.add_argument("name",     type=str, required=True)
 85 |         parser.add_argument("url",      type=str, required=True)
 86 |         parser.add_argument("schedule", type=str, required=True)
 87 |         parser.add_argument("active",   type=bool, default=True, help="Feed is active", required=False)
 88 |         args = parser.parse_args()
 89 | 
 90 |         fg = FeedGroup.query.filter(and_(FeedGroup.key == key, FeedGroup.name == groupname)).first()
 91 |         if not fg:
 92 |             return {"message":"Unknown Feed Group %s" % groupname}, 304
 93 | 
 94 |         # Verify the schedule
 95 |         try:
 96 |             parse_timings(args.schedule)
 97 |         except CronError, err:
 98 |             return {"message": err.message}, 500
 99 | 
100 |         # Check the URL isn't already scheduled on this key
101 |         if [feed for feed in key.feeds if feed.url == args.url]:
102 |             return {"message": "A feed on this key already exists with this url."}, 500
103 | 
104 |         # Check the name is unique to this feedgroup
105 |         if [feed for feed in fg.feeds if feed.name == args.name]:
106 |             return {"message": "A feed in this group already exists with this name."}, 500
107 | 
108 |         feed = Feed(name=args.name, url=args.url, schedule=args.schedule, active=args.active)
109 | 
110 |         # We generally don't want to have objects in this system that don't belong to API keys.
111 |         fg.feeds.append(feed)
112 |         key.feeds.append(feed)
113 | 
114 |         db.session.add(feed)
115 |         db.session.add(fg)
116 |         db.session.add(key)
117 |         db.session.commit()
118 | 
119 |         feed = Feed.query.filter(and_(Feed.key == key, Feed.name == args.name)).first()
120 |         if not feed:
121 |             return {"message":"Error saving feed."}, 304
122 | 
123 |         # Schedule this feed. 0 here is a response
124 |         # queue ID (we're not waiting for a reply)
125 |         app.inbox.put([0, "start", [key,feed.name]])
126 |         return feed.jsonify(), 201
127 | 
128 |     @cors
129 |     @gzipped
130 |     def post(self, groupname):
131 |         "Rename a feedgroup or toggle active status"
132 | 
133 |         key = auth(forbid_reader_keys=True)
134 | 
135 |         parser = restful.reqparse.RequestParser()
136 |         parser.add_argument("name",   type=str, help="Rename a feed group",)
137 |         parser.add_argument("active", type=bool, default=None)
138 |         args = parser.parse_args()
139 | 
140 |         fg = FeedGroup.query.filter(
141 |                 and_(FeedGroup.key == key, FeedGroup.name == groupname)
142 |             ).first()
143 |         if not fg:
144 |             restful.abort(404)
145 | 
146 |         if args.name:
147 |             if FeedGroup.query.filter(
148 |                 and_(FeedGroup.key == key, FeedGroup.name == args.name)
149 |             ).first():
150 |                 return {"message":"A feed already exists with this name."}, 304
151 |             fg.name = args.name
152 | 
153 |         if args.active or args.active == False:
154 |             fg.active = args.active
155 | 
156 |         db.session.add(fg)
157 |         db.session.commit()
158 |         return fg.jsonify()
159 | 
160 |     @cors
161 |     @gzipped
162 |     def delete(self, groupname):
163 |         key = auth(forbid_reader_keys=True)
164 |         
165 |         fg = FeedGroup.query.filter(and_(FeedGroup.key == key, FeedGroup.name == groupname)).first()
166 |         if not fg:
167 |             restful.abort(404)
168 |         count=0
169 |         for feed in fg.feeds:
170 |             for article in feed.articles:
171 |                 count += 1
172 |                 db.session.delete(article)
173 |             db.session.delete(feed)
174 |         db.session.delete(fg)
175 |         db.session.commit()
176 |         count = "{:,}".format(count)
177 |         app.log('%s: Deleted feed group "%s". (%s articles)' % (key.name, fg.name, count))
178 | 
179 |         return {}
180 | 
181 | class FeedGroupArticles(restful.Resource):
182 | 
183 |     @cors
184 |     def get(self, groupname):
185 |         """
186 |          Retrieve articles by feedgroup.
187 |         """
188 |         key = auth()
189 | 
190 |         # Summon the group or 404.
191 |         fg = FeedGroup.query.filter(and_(FeedGroup.key == key, FeedGroup.name == groupname)).first()
192 |         if not fg: restful.abort(404)
193 | 
194 |         parser = restful.reqparse.RequestParser()
195 |         parser.add_argument("page",     type=int,  default=1)
196 |         parser.add_argument("per_page", type=int,  default=10)
197 |         parser.add_argument("content",  type=bool, default=None)
198 |         args = parser.parse_args()
199 | 
200 |         if args.content == True:
201 | 
202 |             query = Article.query.filter(
203 |                     and_(Article.feed.has(group=fg), Article.content != None))\
204 |                     .order_by(desc(Article.created)).paginate(args.page, args.per_page)
205 | 
206 |             response = make_response(request.url, query)
207 | 
208 | #            for doc in response['data']:
209 | #                if not doc['content_available']:
210 | #                    response['data'].remove(doc)
211 | #            return response
212 | 
213 |         if args.content == False:
214 |             query = Article.query.filter(
215 |                     and_(Article.feed.has(group=fg), Article.content == None))\
216 |                     .order_by(desc(Article.created)).paginate(args.page, args.per_page)
217 | 
218 |             return make_response(request.url, query)
219 | 
220 |         query = Article.query.filter(
221 |                 Article.feed.has(group=fg))\
222 |                 .order_by(desc(Article.created)).paginate(args.page, args.per_page)
223 | 
224 |         return make_response(request.url, query)
225 | 
226 | class FeedGroupStart(restful.Resource):
227 | 
228 |     @cors
229 |     def post(self, groupname):
230 |         """
231 |          Start all feeds within a group.
232 |         """
233 |         key = auth(forbid_reader_keys=True)
234 | 
235 |         fg = FeedGroup.query.filter(and_(FeedGroup.key == key, FeedGroup.name == groupname)).first()
236 |         if not fg:
237 |             restful.abort(404)
238 | 
239 |         for feed in fg.feeds:
240 |             app.inbox.put([0, "start", [key,feed.name]])
241 |         return {}
242 | 
243 | class FeedGroupStop(restful.Resource):
244 | 
245 |     def post(self, groupname):
246 |         key = auth(forbid_reader_keys=True)
247 | 
248 |         fg = FeedGroup.query.filter(and_(FeedGroup.key == key, FeedGroup.name == groupname)).first()
249 |         if not fg:
250 |             restful.abort(404)
251 | 
252 |         for feed in fg.feeds:
253 |             app.inbox.put([0, "stop", [key,feed.name]])
254 |         return {}
255 | 
256 | class FeedGroupSearch(restful.Resource):
257 | 
258 |     def get(self, groupname, terms):
259 |         """
260 |          Return articles on feeds in this group with our search terms in the title.
261 |         """
262 |         key = auth()
263 | 
264 |         parser = restful.reqparse.RequestParser()
265 |         parser.add_argument("page",     type=int,  default=1)
266 |         parser.add_argument("per_page", type=int,  default=10)
267 | #        parser.add_argument("content",  type=bool, default=None)
268 |         args = parser.parse_args()
269 | 
270 |         fg = FeedGroup.query.filter(and_(FeedGroup.key == key, FeedGroup.name == groupname)).first()
271 |         if not fg:
272 |             restful.abort(404)
273 | 
274 |         query = Article.query.filter(
275 |                     and_(Article.feed.has(group=fg), Article.title.like("%" + terms + "%")))\
276 |                 .order_by(desc(Article.created)).paginate(args.page, args.per_page)
277 |         return make_response(request.url, query)
278 | 
279 | class FeedGroupCount(restful.Resource):
280 | 
281 |     def get(self, groupname):
282 |         key = auth()
283 | 
284 |         fg = FeedGroup.query.filter(and_(FeedGroup.key == key, FeedGroup.name == groupname)).first()
285 |         if not fg:
286 |             restful.abort(404)
287 | 
288 |         return sum(len(f.articles) for f in fg.feeds)
289 | 


--------------------------------------------------------------------------------
/emissary/resources/feeds.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding: utf-8 _*_
  2 | # This file provides the HTTP endpoints for operating on individual feeds
  3 | from emissary import app, db
  4 | from flask import request
  5 | from flask.ext import restful
  6 | from sqlalchemy import desc, and_
  7 | from emissary.models import Feed, FeedGroup, Article
  8 | from emissary.resources.api_key import auth
  9 | from emissary.controllers.cron import CronError, parse_timings
 10 | from emissary.controllers.utils import make_response, gzipped, cors
 11 | 
 12 | class FeedResource(restful.Resource):
 13 | 
 14 |     @cors
 15 |     @gzipped
 16 |     def get(self, groupname, name):
 17 |         """
 18 |          Review a feed.
 19 |         """
 20 |         key = auth()
 21 | 
 22 |         feed = Feed.query.filter(and_(Feed.name == name, Feed.key == key)).first()
 23 |         if feed:
 24 |             return feed.jsonify()
 25 |         restful.abort(404)
 26 | 
 27 |     @cors
 28 |     @gzipped
 29 |     def post(self, groupname, name):
 30 |         """
 31 |          Modify an existing feed.
 32 |         """
 33 |         key = auth(forbid_reader_keys=True)
 34 | 
 35 |         parser = restful.reqparse.RequestParser()
 36 |         parser.add_argument("name",     type=str)
 37 |         parser.add_argument("group",    type=str)
 38 |         parser.add_argument("url",      type=str)
 39 |         parser.add_argument("schedule", type=str)
 40 |         parser.add_argument("active",   type=bool, default=None, help="Feed is active")
 41 |         args = parser.parse_args()
 42 | 
 43 |         feed = Feed.query.filter(and_(Feed.key == key, Feed.name == name)).first()
 44 |         if not feed:
 45 |             restful.abort(404)
 46 | 
 47 |         if args.name:
 48 |             if Feed.query.filter(and_(Feed.key == key, Feed.name == args.name)).first():
 49 |                 return {"message":"A feed already exists with this name."}, 304
 50 |             feed.name = args.name
 51 | 
 52 |         if args.group:
 53 |             pass
 54 | 
 55 |         if args.active != None:
 56 |             feed.active = args.active
 57 | 
 58 |         if args.url:
 59 |             feed.url = args.url
 60 | 
 61 |         if args.schedule:
 62 |             try:
 63 |                 parse_timings(args.schedule)
 64 |             except CronError, err:
 65 |                 return {"message": err.message}, 500
 66 |             feed.schedule = args.schedule
 67 | 
 68 |         db.session.add(feed)
 69 |         db.session.commit()
 70 | 
 71 |         if args.url or args.schedule:
 72 |             app.inbox.put([0, "stop", [feed.key, feed.name]])
 73 |             app.inbox.put([0, "start", [feed.key, feed.name]])
 74 |             
 75 |         return feed.jsonify()
 76 | 
 77 |     @cors
 78 |     @gzipped
 79 |     def delete(self, groupname, name):
 80 |         """
 81 |          Halt and delete a feed.
 82 |          Default to deleting its articles.
 83 |         """
 84 |         key = auth(forbid_reader_keys=True)
 85 |         feed = Feed.query.filter(and_(Feed.key == key, Feed.name == name)).first()
 86 |         if not feed:
 87 |             restful.abort(404)
 88 |         app.inbox.put([0, "stop", [key, feed.name]])
 89 |         app.log('%s: %s: Deleting feed "%s".' % (feed.key.name, feed.group.name, feed.name))
 90 |         for a in feed.articles:
 91 |             db.session.delete(a)
 92 | 
 93 |         db.session.delete(feed)
 94 |         db.session.commit()
 95 | 
 96 |         return {}
 97 | 
 98 | class FeedArticleCollection(restful.Resource):
 99 | 
100 |     @cors
101 |     def get(self, groupname, name):
102 |         """
103 |          Review the articles for a specific feed on this key.
104 |         """
105 |         key = auth()
106 | 
107 |         feed = Feed.query.filter(and_(Feed.name == name, Feed.key == key)).first()
108 |         if not feed: abort(404)
109 | 
110 |         parser = restful.reqparse.RequestParser()
111 |         parser.add_argument("page",     type=int,  default=1)
112 |         parser.add_argument("per_page", type=int,  default=10)
113 |         parser.add_argument("content",  type=bool, default=None)
114 |         args = parser.parse_args()
115 | 
116 |         # Return a list of the JSONified Articles ordered by descending creation date and paginated.
117 |         if args.content == True:
118 |             query = Article.query.filter(and_(Article.key == key, Article.content != None, Article.feed == feed))\
119 |                     .order_by(desc(Article.created)).paginate(args.page, args.per_page)
120 | 
121 |             return make_response(request.url, query)
122 | 
123 |         elif args.content == False:
124 |             query = Article.query.filter(and_(Article.key == key, Article.content == None, Article.feed == feed))\
125 |                     .order_by(desc(Article.created)).paginate(args.page, args.per_page)
126 | 
127 |             return make_response(request.url, query)
128 | 
129 |         query = Article.query.filter(and_(Article.key == key, Article.feed == feed))\
130 |                 .order_by(desc(Article.created)).paginate(args.page, args.per_page)
131 | 
132 |         return make_response(request.url, query)
133 | 
134 | class FeedSearch(restful.Resource):
135 | 
136 |     @cors
137 |     def get(self, groupname, name, terms):
138 |         """
139 |         Search for articles within a feed.
140 |         """
141 |         key = auth()
142 | 
143 |         parser = restful.reqparse.RequestParser()
144 |         parser.add_argument("page",     type=int,  default=1)
145 |         parser.add_argument("per_page", type=int,  default=10)
146 | #        parser.add_argument("content", type=bool, default=None)
147 |         args = parser.parse_args()
148 | 
149 |         fg = FeedGroup.query.filter(and_(FeedGroup.key == key, FeedGroup.name == groupname)).first()
150 |         if not fg:
151 |             restful.abort(404)
152 | 
153 |         f = [f for f in fg.feeds if f.name == name]
154 |         if not f: abort(404)
155 | 
156 |         f = f[0]
157 | 
158 |         query = Article.query.filter(
159 |                 and_(Article.feed == f, Article.title.like("%" + terms + "%")))\
160 |                 .order_by(desc(Article.created)).paginate(args.page, args.per_page)
161 | 
162 |         return make_response(request.url, query)
163 | 
164 | class FeedStartResource(restful.Resource):
165 | 
166 |     @cors
167 |     def post(self, groupname, name):
168 |         key = auth(forbid_reader_keys=True)
169 | 
170 |         feed = Feed.query.filter(and_(Feed.name == name, Feed.key == key)).first()
171 |         if feed:
172 |             app.inbox.put([0, "start", [key, feed.name]])
173 |             return feed.jsonify()
174 |         restful.abort(404)
175 | 
176 | class FeedStopResource(restful.Resource):
177 | 
178 |     @cors
179 |     def post(self, groupname, name):
180 |         key = auth(forbid_reader_keys=True)
181 | 
182 |         feed = Feed.query.filter(and_(Feed.name == name, Feed.key == key)).first()
183 |         if feed:
184 |             app.inbox.put([0, "stop", [key, feed.name]])
185 |             return feed.jsonify()
186 |         restful.abort(404)
187 | 
188 | 


--------------------------------------------------------------------------------
/emissary/run.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # _*_ coding: utf-8 _*_
  3 | 
  4 | # The reason we don't patch threading is because
  5 | # our IPC queues rely on it for locking. We can't have them
  6 | # be greenlets otherwise they will need the HTTPD to yeild
  7 | # before data from the fetch process can be transmitted.
  8 | from gevent import monkey; monkey.patch_all(thread=False)
  9 | import gevent
 10 | from gevent.queue import Queue
 11 | from gevent.socket import socket
 12 | from gevent.wsgi import WSGIServer
 13 | 
 14 | import os
 15 | import sys
 16 | import pwd
 17 | import time
 18 | import signal
 19 | import _socket
 20 | import optparse
 21 | from multiprocessing import Process
 22 | 
 23 | from emissary import app, init, db
 24 | from emissary.models import APIKey
 25 | from emissary.controllers.log import Log
 26 | from emissary.controllers.scripts import Scripts
 27 | from emissary.controllers.load import parse_crontab
 28 | from emissary.controllers.manager import FeedManager
 29 | 
 30 | try:
 31 | 	import setproctitle
 32 | 	setproctitle.setproctitle("emissary")
 33 | except ImportError:
 34 | 	pass
 35 | 
 36 | def Daemonise(pidfile):
 37 | 	try:
 38 | 		pid = os.fork()
 39 | 		if pid > 0:
 40 | 			sys.exit(0) # End parent
 41 | 	except OSError, e:
 42 | 		sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
 43 | 		sys.exit(-2)
 44 | 	os.setsid()
 45 | 	os.umask(0)
 46 | 	try:
 47 | 		pid = os.fork()
 48 | 		if pid > 0:
 49 | 			try:
 50 | 				# TODO: Read the file first and determine if already running.
 51 | 				f = file(pidfile, 'w')
 52 | 				f.write(str(pid))
 53 | 				f.close()
 54 | 			except IOError, e:
 55 | 				logging.error(e)
 56 | 				sys.stderr.write(repr(e))
 57 | 			sys.exit(0) # End parent
 58 | 	except OSError, e:
 59 | 		sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
 60 | 		sys.exit(-2)
 61 | 	for fd in (0, 1, 2):
 62 | 		try:
 63 | 			os.close(fd)
 64 | 		except OSError:
 65 | 			pass
 66 | 
 67 | def export_crontab(filename):
 68 | 	"""
 69 | 	Defined here to prevent circular imports.
 70 | 	"""
 71 | 	crontab = ""
 72 | 	fd = open(filename, "w")
 73 | 	keys = [k for k in APIKey.query.all() if not k.reader]
 74 | 	for key in keys:
 75 | 		crontab += "apikey: %s\n\n" % key.key
 76 | 		for feed in key.feeds:
 77 | 			crontab += '%s "%s" "%s" %s\n' % (feed.url, feed.name, feed.group.name, feed.schedule)
 78 | 		crontab += '\n\n'
 79 | 	fd.write(crontab)
 80 | 	fd.close()
 81 | 
 82 | if __name__ == "__main__":
 83 | 	prog = "Emissary"
 84 | 	description = "A microservice for archiving the news."
 85 | 	epilog = "Psybernetics %s." % time.asctime().split()[-1]
 86 | 	parser = optparse.OptionParser(prog=prog,version=app.version,description=description,epilog=epilog)
 87 | 
 88 | 	parser.set_usage('python -m emissary.run [options]')
 89 | 	parser.add_option("-c", "--crontab", dest="crontab", action="store", default=None, help="Crontab to parse")
 90 | 	parser.add_option("--config", dest="config", action="store", default=None, help="(defaults to emissary.config)")
 91 | 	parser.add_option("-a", "--address", dest="address", action="store", default='0.0.0.0', help="(defaults to 0.0.0.0)")
 92 | 	parser.add_option("-p", "--port", dest="port", action="store", default='6362', help="(defaults to 6362)")
 93 | 	parser.add_option("--key", dest="key", action="store", default=None, help="SSL key file")
 94 | 	parser.add_option("--cert", dest="cert", action="store", default=None, help="SSL certificate")
 95 | 	parser.add_option("--export", dest="export", action="store", default=False, help="Write out current database as a crontab")
 96 | 	parser.add_option("--pidfile", dest="pidfile", action="store", default="emissary.pid", help="(defaults to ./emissary.pid)")
 97 | 	parser.add_option("--logfile", dest="logfile", action="store", default="emissary.log", help="(defaults to ./emissary.log)")
 98 | 	parser.add_option("--stop", dest="stop", action="store_true", default=False)
 99 | 	parser.add_option("--debug", dest="debug", action="store_true", default=False, help="Log to stdout")
100 | 	parser.add_option("-d", dest="daemonise", action="store_true", default=False, help="Run in the background")
101 | 	parser.add_option("--run-as", dest="run_as", action="store",default=None, help="(defaults to the invoking user)")
102 | 	parser.add_option("--scripts-dir", dest="scripts_dir", action="store", default="scripts", help="(defaults to ./scripts/)")
103 | 	(options,args) = parser.parse_args()
104 | 
105 | 	if options.config:
106 | 		app.config.from_object(options.config)
107 | 
108 | 	if options.crontab:
109 | 		parse_crontab(options.crontab)
110 | 		raise SystemExit
111 | 
112 | 	app.debug = options.debug
113 | 
114 | 	# Build logger from config
115 | 	log = Log("Emissary", log_file=options.logfile, log_stdout= not options.daemonise)
116 | 	log.debug = options.debug
117 | 	app.log = log
118 | 
119 | 	log("Starting Emissary %s." % app.version)
120 | 
121 | 	if options.stop:
122 | 		pid = None
123 | 		try:
124 | 			f = file(options.pidfile, 'r')
125 | 			pids = f.readline().split()
126 | 			f.close()
127 | 			os.unlink(options.pidfile)
128 | 		except ValueError, e:   
129 | 			sys.stderr.write('Error in pid file "%s". Aborting\n' % options.pidfile)
130 | 			sys.exit(-1)
131 | 		except IOError, e:
132 | 			pass
133 | 		if pids:
134 | 			for pid in pids:
135 | 				os.kill(int(pid), 15)
136 | 				print "Killed process with ID %s." % pid
137 | 		else:
138 | 			sys.stderr.write('Emissary not running or no PID file found\n')
139 | 		sys.exit(0)
140 | 
141 | 	if options.export:
142 | 		try:
143 | 			export_crontab(options.export_crontab)
144 | 			log('Crontab written to "%s".' % options.export_crontab)
145 | 		except Exception, e:
146 | 			log('Error writing crontab: %s' % e.message)
147 | 		raise SystemExit
148 | 
149 | 
150 | 	if not options.key and not options.cert:
151 | 		print "SSL cert and key required. (--key and --cert)"
152 | 		print "Keys and certs can be generated with:"
153 | 		print "$ openssl genrsa 1024 > key"
154 | 		print "$ openssl req -new -x509 -nodes -sha1 -days 365 -key key > cert"
155 | 		raise SystemExit
156 | 
157 | 	if '~' in options.cert: options.cert = os.path.expanduser(options.cert)
158 | 	if '~' in options.key:  options.key  = os.path.expanduser(options.key)
159 | 
160 | 	if not os.path.isfile(options.cert):
161 | 		sys.exit("Certificate not found at %s" % options.cert)
162 | 
163 | 	if not os.path.isfile(options.key):
164 | 		sys.exit("Key not found at %s" % options.key)
165 | 
166 | 	if (pwd.getpwuid(os.getuid())[2] == 0) and not options.run_as:
167 | 		print "Running as root is not permitted.\nExecute this as a different user."
168 | 		raise SystemExit
169 | 
170 | 	sock = (options.address, int(options.port))
171 | 
172 | 	if options.run_as:
173 | 		sock = socket(family=_socket.AF_INET)
174 | 		try:
175 | 			sock.bind((options.address, int(options.port)))
176 | 		except _socket.error:
177 | 			ex = sys.exc_info()[1]
178 | 			strerror = getattr(ex, 'strerror', None)
179 | 			if strerror is not None:
180 | 				ex.strerror = strerror + ': ' + repr(options.address+':'+options.port)
181 | 			raise
182 | 		sock.listen(50)
183 | 		sock.setblocking(0)
184 | 		uid = pwd.getpwnam(options.run_as)[2]
185 | 		try:
186 | 			os.setuid(uid)
187 | 			log("Now running as %s." % options.run_as)
188 | 		except Exception, e: raise
189 | 
190 | 	# Create the database schema and insert an administrative key
191 | 	init()
192 | 
193 | 	if options.daemonise: Daemonise(options.pidfile)
194 | 
195 | 	# Load scripts
196 | 	app.scripts = Scripts(options.scripts_dir)
197 | 	app.scripts.reload()
198 | 
199 | 	# Trap SIGHUP to reload scripts
200 | 	signal.signal(signal.SIGHUP, app.scripts.reload)
201 | 
202 | 
203 | 	# Initialise the feed manager with the logger, provide IPC access and load feeds.
204 | 	fm = FeedManager(log)
205 | 	fm.db           = db
206 | 	fm.app          = app # Queue access
207 | 	fm.load_feeds()
208 | 
209 | 	# Start the REST interface
210 | 	httpd = WSGIServer(sock, app, certfile=options.cert, keyfile=options.key)
211 | 	httpd.loop.reinit()
212 | 	httpd_process = Process(target=httpd.serve_forever)
213 | 	log("Binding to %s:%s" % (options.address, options.port))
214 | 	httpd_process.start()
215 | 
216 | 	if options.daemonise:
217 | 		f = file(options.pidfile, 'a')
218 | 		f.write(' %i' % httpd_process.pid)
219 | 		f.close()
220 | 
221 | 	try:
222 | 		fm.run()
223 | 	except KeyboardInterrupt:
224 | 		log("Stopping...")
225 | 		httpd_process.terminate()
226 | 


--------------------------------------------------------------------------------
/scripts/hello.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding: utf-8 _*_
 2 | #
 3 | # This script creates a named pipe (if it doesn't exist)
 4 | # and writes the feed name, article title and url to it
 5 | # whenever an article is saved to the database. 
 6 | #
 7 | # This is useful for composing systems that constantly read
 8 | # the FIFO and do things like emit the data to IRC channels.
 9 | #
10 | # You could, for instance, perform fuzzy pattern matching and be
11 | # notified when certain keywords are in the news.
12 | #
13 | # Transmission to a natural language processing/translation service
14 | # can also be done in a script or by reading a FIFO like the one here.
15 | #
16 | # Whether you use this system to profit, perform intelligence analysis
17 | # or inform your next vote is hopefully up to you!
18 | #
19 | # Luke Brooks, 2015
20 | # MIT License
21 | # Many big thanks to God, lord of universes.
22 | fifo = "/tmp/emissary.pipe"
23 | 
24 | import os, stat
25 | if not os.path.exists(fifo):
26 | 	try:
27 | 		os.mkfifo(fifo)
28 | 	except Exception, e:
29 | 		cache['app'].log("Error creating %s: %s" % (fifo, e.message))
30 | 
31 | # Emissary always executes scripts with an article and its feed in the namespace.
32 | 
33 | # There is also a dictionary named cache, containing the app object.
34 | # Random aside but through the app object you can access the logging interface and the feed manager.
35 | try:
36 | 	# READER BEWARE: Use non-blocking IO or you won't be storing owt.
37 | 	fd = os.open(fifo, os.O_CREAT | os.O_WRONLY | os.O_NONBLOCK)
38 | 	os.write(fd, "%s: %s\n%s\n" % (feed.name, article.title, article.url))
39 | 	os.close(fd)
40 | 	del fd
41 | except Exception, e: # Usually due to there not being a reader fd known to the kernel.
42 | 	pass
43 | 
44 | del os, stat, fifo
45 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # _*_ coding: utf-8 _*_
 3 | from setuptools import setup, find_packages
 4 | import os
 5 | import imp
 6 | 
 7 | def non_python_files(path):
 8 |     """ Return all non-python-file filenames in path """
 9 |     result = []
10 |     all_results = []
11 |     module_suffixes = [info[0] for info in imp.get_suffixes()]
12 |     ignore_dirs = ['cvs']
13 |     for item in os.listdir(path):
14 |         name = os.path.join(path, item)
15 |         if (
16 |             os.path.isfile(name) and
17 |             os.path.splitext(item)[1] not in module_suffixes
18 |             ):
19 |             result.append(name)
20 |         elif os.path.isdir(name) and item.lower() not in ignore_dirs:
21 |             all_results.extend(non_python_files(name))
22 |     if result:
23 |         all_results.append((path, result))
24 |     return all_results
25 | 
26 | data_files = (
27 | #    non_python_files('emissary') +
28 | #    non_python_files(os.path.join('Emissary', 'doc'))
29 |     )
30 | 
31 | setup(name='Emissary',
32 |       version="2.1.1",
33 |       description='A microservice for indexing the plain text of articles and essays',
34 |       author='Luke Brooks',
35 |       author_email='luke@psybernetics.org.uk',
36 |       url='http://psybernetics.org.uk/emissary',
37 |       download_url = 'https://github.com/LukeB42/Emissary/tarball/2.0.0',
38 |       data_files = data_files,
39 |       packages=['emissary', 'emissary.resources', 'emissary.controllers'],
40 |       include_package_data=True,
41 |       install_requires=[
42 |           "setproctitle",
43 |           "goose-extractor",
44 |           "lxml",
45 |           "gevent",
46 |           "Flask-RESTful",
47 |           "Flask-SQLAlchemy",
48 |           "cssselect",
49 |           "BeautifulSoup",
50 |           "feedparser",
51 |           "python-snappy",
52 |           "requests",
53 |           "pygments",
54 |           "window",
55 |       ],
56 |       keywords=["text extraction","document archival","document retrieval"]
57 | )
58 | 


--------------------------------------------------------------------------------