├── README.md
├── __init__.py
├── .gitignore
├── routekit.py
├── dockit.py
├── LICENSE
├── cpustats.py
├── netstats.py
├── stringkit.py
├── procstats.py
├── pipekit.py
├── markup.py
├── filekit.py
├── favicon.py
├── core.py
├── datakit.py
├── logkit.py
├── imagekit.py
├── timekit.py
├── taskkit.py
├── decorators.py
└── urlkit.py


/README.md:
--------------------------------------------------------------------------------
1 | python-utils
2 | ============
3 | 
4 | A set of libraries I constantly re-use for a number of projects, so that I have a canonical, always-updated reference/sub-repo I can refer to.
5 | 
6 | Some of these were designed to cope with older Python runtimes or to provide smaller, more manageable dependencies for common tasks.
7 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2012, Rui Carmo
 6 | Description: Utility functions
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import os
11 | import sys
12 | import logging
13 | 
14 | log = logging.getLogger()
15 | 
16 | # export commonly-used submodule symbols
17 | from utils.core import Struct, Singleton, get_config, tb
18 | from utils.filekit import path_for, locate
19 | from utils.timekit import time_since


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | bin/
12 | build/
13 | develop-eggs/
14 | dist/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # Installer logs
26 | pip-log.txt
27 | pip-delete-this-directory.txt
28 | 
29 | # Unit test / coverage reports
30 | htmlcov/
31 | .tox/
32 | .coverage
33 | .cache
34 | nosetests.xml
35 | coverage.xml
36 | 
37 | # Translations
38 | *.mo
39 | 
40 | # Mr Developer
41 | .mr.developer.cfg
42 | .project
43 | .pydevproject
44 | 
45 | # Rope
46 | .ropeproject
47 | 
48 | # Django stuff:
49 | *.log
50 | *.pot
51 | 
52 | # Sphinx documentation
53 | docs/_build/
54 | 
55 | 


--------------------------------------------------------------------------------
/routekit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2014, Rui Carmo
 6 | Description: Bottle-specific utility functions
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import os
11 | import sys
12 | import logging
13 | import json
14 | 
15 | log = logging.getLogger()
16 | 
17 | def inspect_routes(app):
18 |     for route in app.routes:
19 |         if 'mountpoint' in route.config:
20 |             prefix = route.config['mountpoint']['prefix']
21 |             subapp = route.config['mountpoint']['target']
22 | 
23 |             for prefixes, route in inspect_routes(subapp):
24 |                 yield [prefix] + prefixes, route
25 |         else:
26 |             yield [], route
27 | 
28 | def dump_routes(app):
29 |     for prefixes, route in inspect_routes(app):
30 |         abs_prefix = '/'.join(part for p in prefixes for part in p.split('/'))
31 |         log.warn("Prefix:'%s' Route:'%s' [%s] %s" % (abs_prefix, route.rule, route.method, route.callback))
32 | 


--------------------------------------------------------------------------------
/dockit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2012, Rui Carmo
 6 | Description: Docstring utility functions
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import logging
11 | import inspect
12 | from bottle import app
13 | 
14 | log = logging.getLogger()
15 | 
16 | def docs():
17 |     """Gather all docstrings related to routes and return them grouped by module"""
18 | 
19 |     routes = []
20 |     modules = {}
21 |     for route in app().routes:
22 |         doc = inspect.getdoc(route.callback) or inspect.getcomments(route.callback)
23 |         if not doc:
24 |             doc = ''
25 |         module = inspect.getmodule(route.callback).__name__
26 |         item = {
27 |             'method': route.method,
28 |             'route': route.rule,
29 |             'function': route.callback.__name__,
30 |             'module': module,
31 |             'doc': inspect.cleandoc(doc)
32 |         }
33 |         if not module in modules:
34 |             modules[module] = []
35 |         modules[module].append(item)
36 |     return modules


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Rui Carmo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/cpustats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2012, Rui Carmo
 6 | Description: Utility functions for retrieving CPU statistics
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import logging
11 | import time
12 | 
13 | log = logging.getLogger()
14 | 
15 | 
16 | def stats():
17 |     """Retrieves all CPU counters"""
18 |     cpu = open('/proc/stat','r').readlines()[0]
19 |     return map(float,cpu.split()[1:5])
20 | 
21 | 
22 | def usage(interval=0.1):
23 |     """Estimates overall CPU usage during a short time interval"""
24 |     t1 = stats()
25 |     time.sleep(interval)
26 |     t2 = stats() 
27 |     delta = [t2[i] - t1[i] for i in range(len(t1))]
28 |     try:
29 |         return 1.0 - (delta[-1:].pop()/(sum(delta)*1.0))
30 |     except: 
31 |         return 0.0
32 | 
33 | 
34 | def freqency(cpu='cpu0'):
35 |     """Retrieves the current CPU speed in MHz - for a single CPU"""
36 |     return float(open('/sys/devices/system/cpu/%s/cpufreq/scaling_cur_freq' % cpu,'r').read().strip())/1000.0
37 | 
38 | 
39 | def temperature():
40 |     """Retrieves the current CPU core temperature in degrees Celsius - tailored to the Raspberry Pi"""
41 |     return float(open('/sys/class/thermal/thermal_zone0/temp','r').read().strip())/1000.0
42 | 


--------------------------------------------------------------------------------
/netstats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2013, Rui Carmo
 6 | Description: Network utility functions
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import re
11 | import logging
12 | import socket
13 | import struct
14 | 
15 | log = logging.getLogger()
16 | 
17 | 
18 | def valid_mac_address(addr):
19 |     """Validate a physical Ethernet address"""
20 |     return re.match("[0-9a-f]{2}([-:][0-9a-f]{2}){5}$", addr.lower())
21 | 
22 | 
23 | def valid_ip_address(addr):
24 |     """Quick and dirty way to validate any kind of IP address"""
25 |     try:
26 |         socket.inet_aton(addr)
27 |         return True
28 |     except socket.error:
29 |         return False
30 | 
31 | 
32 | def get_net_bytes(dev='eth0'):
33 |     """Read network interface traffic counters"""
34 |     return {
35 |         'rx': float(open('/sys/class/net/%s/statistics/rx_bytes' % dev,'r').read().strip()),
36 |         'tx': float(open('/sys/class/net/%s/statistics/tx_bytes' % dev,'r').read().strip())
37 |     }
38 | 
39 | 
40 | def get_mac_address(dev="eth0"):
41 |     """Retrieves the MAC address from the /sys virtual filesystem - will only work on Linux."""
42 |     return open('/sys/class/net/%s/address' % dev,'r').read().strip()
43 | 
44 | 
45 | def get_ip_address(dev="eth0"):
46 |     """Retrieves the IP address via SIOCGIFADDR - only tested on Linux."""
47 |     try:
48 |         s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
49 |         return socket.inet_ntoa(fcntl.ioctl(s.fileno(),0x8915,struct.pack('256s', dev[:15]))[20:24])
50 |     except:
51 |         return None


--------------------------------------------------------------------------------
/stringkit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2012, Rui Carmo
 6 | Description: Utility functions for retrieving process information
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import sys
11 | import re
12 | import logging
13 | import htmlentitydefs
14 | import unicodedata
15 | 
16 | log = logging.getLogger()
17 | 
18 | 
19 | def rsplit(s, sep=None, maxsplit=-1):
20 |     """Equivalent to str.split, except splitting from the right"""
21 |     if sys.version_info < (2, 4, 0):
22 |         if sep is not None:
23 |             sep = sep[::-1]
24 |         L = s[::-1].split(sep, maxsplit)
25 |         L.reverse()
26 |         return [s[::-1] for s in L]
27 |     else:
28 |         return s.rsplit(sep, maxsplit)
29 | 
30 | 
31 | def shrink(line, bound=50, rep='[...]'):
32 |     """Shrinks a string, adding an ellipsis to the middle"""
33 |     l = len(line)
34 |     if l < bound:
35 |         return line
36 |     if bound <= len(rep):
37 |         return rep
38 |     k = bound - len(rep)
39 |     return line[0:k / 2] + rep + line[-k / 2:]
40 | 
41 | 
42 | def convert_entity(m):
43 |     """Converts entities to codepoints where applicable"""
44 |     if m.group(1) == '#':
45 |         try:
46 |             return unichr(int(m.group(2)))
47 |         except ValueError:
48 |             return '&#%s;' % m.group(2)
49 |     try:
50 |         return unichr(htmlentitydefs.name2codepoint[m.group(2)])
51 |     except KeyError:
52 |         return '&%s;' % m.group(2)
53 | 
54 | 
55 | def convert_html(buffer):
56 |     """Replaces all entities with codepoints"""
57 |     return re.sub(r'&(#?)(.+?);', convertentity, buffer)
58 | 
59 | 
60 | def munge_string(buffer):
61 |     """Builds anchor IDs"""
62 |     return re.sub("[\W+]", "-", buffer.lower())
63 |     
64 |     
65 | def remove_diacritics(buffer):
66 |     """Remove diactritical marks in Latin characters"""
67 |     unicodedata.normalize('NFKD', unicode(buffer)).encode('ASCII', 'ignore')
68 | 


--------------------------------------------------------------------------------
/procstats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2012, Rui Carmo
 6 | Description: Utility functions for retrieving process information
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import os
11 | import logging
12 | import platform
13 | import __builtin__
14 | 
15 | log = logging.getLogger()
16 | 
17 | # Module globals
18 | openfiles = set()
19 | oldfile = __builtin__.file
20 | oldopen = __builtin__.open
21 | patched = False
22 | 
23 | 
24 | class _file(oldfile):
25 |     """File wrapper"""
26 |     def __init__(self, *args):
27 |         self.x = args[0]
28 |         log.debug("FILE OPEN: %s" % str(self.x))
29 |         oldfile.__init__(self, *args)
30 |         openfiles.add(self)
31 | 
32 |     def close(self):
33 |         log.debug("FILE CLOSED: %s" % str(self.x))
34 |         oldfile.close(self)
35 |         openfiles.remove(self)
36 | 
37 | 
38 | def _open(*args):
39 |     return newfile(*args)
40 | 
41 | 
42 | def monkeypatch_files():
43 |     """Wraps builtin file operations to allow us to track open files"""
44 |     __builtin__.file = _file
45 |     __builtin__.open = _open
46 |     patched = True
47 | 
48 | 
49 | def get_open_fd_count():
50 |     if 'Darwin' in platform.platform():
51 |         pid = os.getpid()
52 |         procs = subprocess.check_output([ "lsof", '-w', '-Ff', "-p", str(pid)])
53 |         nprocs = len(filter(lambda s: s and s[0] == 'f' and s[1:].isdigit(),procs.split('\n')))
54 |         return nprocs
55 |     # check if we've monkeypatched anything
56 |     if patched:
57 |         return len(get_open_files())
58 |     else:
59 |         # Will only work for Linux
60 |         return len(os.listdir('/proc/self/fd'))
61 | 
62 | 
63 | def get_open_files():
64 |    return [f.x for f in openfiles]
65 | 
66 | 
67 | def stats(pid):
68 |     """Retrieve process kernel counters"""
69 |     stats = open('/proc/%d/status' % pid,'r').readlines()
70 |     return dict(filter(lambda x: len(x)==2,map(lambda x: x.split()[:2],stats)))
71 | 
72 | 
73 | def rss(pid):
74 |     """Retrieve a process' resident set size"""
75 |     try:
76 |         return int(stats(pid)['VmRSS:'])
77 |     except:
78 |         return 0
79 | 


--------------------------------------------------------------------------------
/pipekit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2012, Rui Carmo
 6 | Description: Pipeline patterns, mostly taken from itertools recipes
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import itertools
11 | import collections
12 | 
13 | def chunk(chunk_size=32):
14 |     """Group chunk_size elements into lists"""
15 | 
16 |     def chunker(gen):
17 |         gen = iter(gen)
18 |         chunk = []
19 |         try:
20 |             while True:
21 |                 for _ in xrange(chunk_size):
22 |                     chunk.append(gen.next())
23 |                 yield chunk
24 |                 chunk = []
25 |         except StopIteration:
26 |             if chunk:
27 |                 yield chunk
28 | 
29 |     return chunker
30 |  
31 | 
32 | def flatten(gen):
33 |     """Flatten a sequence, but only one level deep."""
34 | 
35 |     return itertools.chain.from_iterable(gen)
36 | 
37 | 
38 |  
39 | def sink(iter, steps=None):
40 |     """Sink data from an iterator, effecting any results from it being consumed."""
41 |  
42 |     if steps is None:
43 |         # feed the entire iterator into a zero-length deque
44 |         collections.deque(iter, maxlen=0)
45 |     else:
46 |         # advance to the empty slice starting at position 'steps'
47 |         next(itertools.islice(iter, steps, steps), None)
48 | 
49 | 
50 | 
51 | def make_unique(seq, transform=None):
52 |     """Remove duplicate items from a sequence"""
53 |     
54 |     if transform is None: 
55 |         def transform(x): return x 
56 |     seen = {} 
57 |     for item in seq: 
58 |         marker = transform(item) 
59 |         if marker not in seen:
60 |             seen[marker] = True
61 |             yield item
62 | 
63 | 
64 | def pipeline(source, functions):
65 |     """Apply an array of functions to a source iterable"""
66 | 
67 |     return reduce(lambda x, y: y(x), functions, source)
68 | 
69 | 
70 | if __name__=='__main__':
71 | 
72 |     def sum(iter):
73 |         for i in iter:
74 |             yield i + 1
75 | 
76 |     steps = [
77 |         sum,
78 |         chunk(8),
79 |         chunk(4)
80 |     ]
81 |     p = pipeline(xrange(64), steps)
82 |     for i in p:
83 |         print i
84 | 
85 | 


--------------------------------------------------------------------------------
/markup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | """
 4 | Core classes
 5 | 
 6 | Created by Rui Carmo on 2006-09-10.
 7 | Published under the MIT license.
 8 | """
 9 | 
10 | import logging
11 | 
12 | log = logging.getLogger()
13 | 
14 | 
15 | def sanitize_title(title):
16 |     """Generate a usable anchor from a title string"""
17 | 
18 |     return re.sub("[\W+]","-",title.lower())
19 | 
20 | 
21 | def parse_rfc822(buffer, mime_type='text/plain'):
22 |     """Helper function for parsing metadata out of a plaintext buffer"""
23 | 
24 |     headers = {}
25 |     markup  = ''
26 |     if mime_type in ['text/plain', 'text/x-textile', 'text/x-markdown']:
27 |         try:
28 |             (header_lines,markup) = buffer.split("\n\n", 1)
29 |             for header in header_lines.strip().split("\n"):
30 |                 (name, value) = header.strip().split(":", 1)
31 |                 headers[name.lower().strip()] = unicode(value.strip())
32 |             if 'content-type' in headers:
33 |                 mime_type = headers['content-type']
34 |         except:
35 |             raise TypeError, "Invalid file format."
36 |     return headers, markup, mime_type
37 | 
38 | 
39 | def render_markup(raw, markup=u'text/html'):
40 |     """Turn markup into nice HTML"""
41 | 
42 |     # Allow module to load regardless of textile or markdown support
43 |     try:
44 |         import textile
45 |         import smartypants
46 |         import markdown
47 |     except ImportError:
48 |         pass
49 | 
50 |     def _markdown(raw):
51 |         log.debug("Rendering Markdown")
52 |         return markdown.Markdown(extensions=['extra','toc','smarty','codehilite','meta','sane_lists'], safe_mode=False).convert(raw)
53 | 
54 |     def _plaintext(raw):
55 |         log.debug("Rendering plaintext")
56 |         return u'<pre>\n%s</pre>' % raw
57 | 
58 |     def _textile(raw):
59 |         log.debug("Rendering Textile")
60 |         return smartypants.smartyPants(textile.textile(unicode(raw), head_offset=0, validate=0, sanitize=1, encoding='utf-8', output='utf-8'))
61 | 
62 |     def _html(raw):
63 |         return raw
64 | 
65 |     return {
66 |         u'text/plain'         : _plaintext,
67 |         u'text/x-web-markdown': _markdown,
68 |         u'text/x-markdown'    : _markdown,
69 |         u'text/markdown'      : _markdown,
70 |         u'text/textile'       : _textile,
71 |         u'text/x-textile'     : _textile,
72 |         u'text/html'          : _html}[markup](raw)
73 | 
74 | 


--------------------------------------------------------------------------------
/filekit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2012, Rui Carmo
 6 | Description: File utility functions
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import os
11 | import sys
12 | import logging
13 | import zipfile
14 | 
15 | log = logging.getLogger()
16 | 
17 | def path_for(name, script=__file__):
18 |     """Build absolute paths to resources based on app path"""
19 | 
20 |     if 'uwsgi' in sys.argv:
21 |         return os.path.join(os.path.abspath(os.path.join(os.path.dirname(script),'..')),name)
22 |     return os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]),name))
23 | 
24 | 
25 | def locate(pattern, root=os.getcwd()):
26 |     """Generator for iterating inside a file tree"""
27 | 
28 |     for path, dirs, files in os.walk(root):
29 |         for filename in [os.path.abspath(os.path.join(path, filename)) for filename in files if fnmatch.fnmatch(filename, pattern)]:
30 |             yield filename
31 | 
32 | 
33 | def walk(top, topdown=True, onerror=None, followlinks=False, ziparchive=None, zipdepth=0):
34 |     """Reimplementation of os.walk to traverse ZIP files as well"""
35 | 
36 |     try:
37 |         if (os.path.splitext(top)[1]).lower() == '.zip':
38 |             if ziparchive:
39 |                 # skip nested ZIPs.
40 |                 yield top, [], []
41 |             else:
42 |                 ziparchive = zipfile.ZipFile(top)
43 |             names = list(set(map(lambda x: [p+'/' for p in x.split('/') if p != ""][zipdepth],ziparchive.namelist())))
44 |         else:
45 |             names = os.listdir(top)
46 |     except error, err:
47 |         if onerror is not None:
48 |             onerror(err)
49 |         return
50 | 
51 |     dirs, nondirs = [], []
52 |     if ziparchive:
53 |         for name in names:
54 |             if name == '__MACOSX/':
55 |                 continue
56 |             if name[-1::] == '/':
57 |                 dirs.append(name)
58 |             else:
59 |                 nondirs.append(name)
60 |     else:        
61 |         for name in names:
62 |             if os.path.isdir(os.path.join(top, name)):
63 |                 dirs.append(name)
64 |             else:
65 |                 nondirs.append(name)
66 |     if topdown:
67 |         yield top, dirs, nondirs
68 |     for name in dirs:
69 |         new_path = os.path.join(top, name)
70 |         if ziparchive:
71 |             for x in walk(new_path, topdown, onerror, followlinks):
72 |                 yield x
73 |         else:
74 |             if followlinks or not islink(new_path):
75 |                 for x in walk(new_path, topdown, onerror, followlinks):
76 |                     yield x
77 |     if not topdown:
78 |         yield top, dirs, nondirs


--------------------------------------------------------------------------------
/favicon.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Favicon retrieval
 5 | 
 6 | Created by: Rui Carmo
 7 | License: MIT (see LICENSE for details)
 8 | """
 9 | import logging
10 | 
11 | log = logging.getLogger()
12 | 
13 | import urlparse
14 | from utils.urlkit import fetch, data_uri
15 | from bs4 import BeautifulSoup
16 | 
17 | _default = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAMAAAAoLQ9TAAAAxlBMVEUAAABOWZ5BTZhCTZhHUpt7g7d5gbZ5grZ5grZ6grZsda9sdq9tdq9tdrBtd7Bye7JxerJye7JzfLN0fbNdaKdeaadfaahfaqhha6ldZ6dfaahfaqhjbat3gLV6grZ6grd8hLh/h7mAh7mFjLxfaahgaqlha6libKpjbapRXKBSXKBSXaFTXqFUX6KNmcKXo8idqcujrs6uuNWzvdi5wtu+x96/x97EzOHJ0eXQ1ufV2+vb4O/g5fHm6vXr7/fx9Pv8/f////8y4F8aAAAALnRSTlMACR0dI1BRUVJSiIiIiIi8vb29vdbW1tbW4uLi4uzs7Ozs7Ozx8fHx8f39/f39FstVagAAALBJREFUGBllwUFOw0AMQNFve6Yhk6RFAhZsev9rwRap6iKZtp4kRrCE9+APAZGuvGX8q3oEhtgwHUexYVP2wNByei025qdx8LaF0U1noGWTdlq2VSmlhwgjNht6jPNLcpgU5HGUSyIn1UNWkEbKKCiDBz+EIOGedKpwSOP2aBixP4Pd9hZZP653ZZkrvzzqrWIE3mfRld4/Zw9BrCv9e3hcl+pbGMTaQvb1fpnXPfjnG2UzUabhPViuAAAAAElFTkSuQmCC"
18 | 
19 | def google_fetcher(site):
20 |     """Fetch the favicon via Google services"""
21 |     endpoint = "http://www.google.com/s2/favicons?domain=%s" % urlparse.urlparse(site).hostname
22 |     try:
23 |         res = fetch(endpoint)
24 |     except Exception, e:
25 |         log.error("could not fetch %s: %s" % (endpoint, e))
26 |         return None
27 |     return data_uri(res['content-type'], res['data'])
28 | 
29 |     
30 | def dumb_fetcher(site):
31 |     """Fetch the favicon the dumb way"""
32 |     endpoint = "http://%s/favicon.ico" % urlparse.urlparse(site).hostname
33 |     try:
34 |         res = fetch(endpoint)
35 |     except Exception, e:
36 |         log.error("could not fetch %s: %s" % (endpoint, e))
37 |         return None
38 |     return data_uri(res['content-type'], res['data'])
39 |    
40 |     
41 | def html_fetcher(site):
42 |     """Fetch the favicon the hard way"""
43 |     endpoint = "http://%s" % urlparse.urlparse(site).hostname
44 |     try:
45 |         res = fetch(endpoint)
46 |     except Exception, e:
47 |         log.error("Could not fetch %s: %s" % (endpoint, e))
48 |         return None
49 |         
50 |     try:
51 |         soup = BeautifulSoup(res['data'])
52 |     except Exception, e:
53 |         log.error("Could not parse %s: %s" % (endpoint, e))
54 |         return None
55 | 
56 |     link = soup.find("link", rel="shortcut icon")
57 |     if not link:
58 |         return None        
59 |     url = link['href']
60 |     try:
61 |         res = fetch(url)
62 |     except Exception, e:
63 |         log.error("could not fetch %s: %s" % (endpoint, e))
64 |         return None
65 |     return data_uri(res['content-type'], res['data'])
66 | 
67 | 
68 | def fetch_anyway(site):
69 |     global _default
70 |     data = None
71 |     for handler in [google_fetcher,dumb_fetcher,html_fetcher]:
72 |         data = handler(site)
73 |         if data:
74 |             return data
75 |     return _default
76 | 


--------------------------------------------------------------------------------
/core.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2012, Rui Carmo
 6 | Description: Core utility functions
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import os
11 | import sys
12 | import logging
13 | import json
14 | 
15 | log = logging.getLogger()
16 | 
17 | from filekit import path_for
18 | 
19 | class Singleton(type):
20 |     """An implemetation of the Singleton pattern (use as metaclass)"""
21 | 
22 |     _instances = {}
23 | 
24 |     def __call__(cls, *args, **kwargs):
25 |         if cls not in cls._instances:
26 |             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
27 |         return cls._instances[cls]
28 | 
29 | 
30 | class Struct(dict):
31 |     """An object that recursively builds itself from a dict and allows easy access to attributes"""
32 | 
33 |     def __init__(self, obj):
34 |         dict.__init__(self, obj)
35 |         for k, v in obj.iteritems():
36 |             if isinstance(v, dict):
37 |                 self.__dict__[k] = Struct(v)
38 |             else:
39 |                 self.__dict__[k] = v
40 | 
41 |     def __getattr__(self, attr):
42 |         try:
43 |             return self.__dict__[attr]
44 |         except KeyError:
45 |             raise AttributeError(attr)
46 | 
47 |     def __setitem__(self, key, value):
48 |         super(Struct, self).__setitem__(key, value)
49 |         self.__dict__[key] = value
50 | 
51 |     def __setattr__(self, attr, value):
52 |         self.__setitem__(attr, value)
53 | 
54 | 
55 | def json_str(item, bind_env=True):
56 |     """Helper function to cast JSON unicode data to plain str and bind environment variables"""
57 | 
58 |     if isinstance(item, dict):
59 |         return {json_str(key,bind_env=bind_env): json_str(value,bind_env=bind_env) for key, value in item.iteritems()}
60 |     elif isinstance(item, list):
61 |         return [json_str(element, bind_env=bind_env) for element in item]
62 |     elif isinstance(item, unicode) and bind_env:
63 |         env = os.environ
64 |         env.update({"APPLICATION_ROOT": path_for('')})
65 |         try:
66 |             return item.encode('utf-8') % env
67 |         except:
68 |             return item.encode('utf-8')
69 |     else:
70 |         return item
71 | 
72 | 
73 | def get_config(filename=None):
74 |     """Parses a configuration file and returns a Struct for managing the configuration"""
75 | 
76 |     if not filename:
77 |         return Struct({})
78 |     return Struct(json.load(open(filename, 'r'),object_hook=json_str))
79 | 
80 | 
81 | def safe_eval(buffer):
82 |     """Perform safe evaluation of a (very) small subset of Python functions"""
83 | 
84 |     if '%' == buffer[0]:
85 |         try:
86 |             return eval(buffer[1:],{"__builtins__":None},{"environ":os.environ})
87 |         except Exception, e:
88 |             log.error('Error %s while doing safe_eval of %s' % (e, buffer))
89 |             return None
90 |     return buffer
91 | 
92 | 
93 | def tb():
94 |     """Return a concise traceback summary"""
95 | 
96 |     etype, value, tb = sys.exc_info()
97 |     return "%s: %s (%s@%s:%d)" % (etype.__name__, value, tb.tb_frame.f_code.co_name, os.path.basename(tb.tb_frame.f_code.co_filename), tb.tb_lineno)
98 | 


--------------------------------------------------------------------------------
/datakit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2013, Rui Carmo
 6 | Description: Clustering and statistics helpers
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import logging
11 | import re
12 | 
13 | log = logging.getLogger()
14 | 
15 | _stopwords = {"en":"i,a,an,are,as,at,be,by,for,from,how,in,is,it,of,on,or,that,the,this,to,was,what,when,where".split(',')}
16 | 
17 | 
18 | def strip_stopwords(sentence, lang="en"):
19 |     """Removes stopwords and normalizes whitespace - adapted from Django"""
20 | 
21 |     global _stopwords
22 |     words = sentence.split()
23 |     sentence = []
24 |     for word in words:
25 |         if word.lower() not in _stopwords[lang]:
26 |             sentence.append(word)
27 |     return u' '.join(sentence)
28 | 
29 | 
30 | def jaccard_distance(a, b):
31 |     """A simple distance function based on string overlap - adapted from sample code by Deepak Thukral"""
32 |     #Tokenize string into bag of words
33 |     feature1 = set(re.findall('\w+', strip_stopwords(a.lower()))[:100])
34 |     feature2 = set(re.findall('\w+', strip_stopwords(b.lower()))[:100])
35 |     similarity = 1.0 * len(feature1.intersection(feature2)) / len(feature1.union(feature2))
36 |     return 1 - similarity
37 | 
38 | 
39 | 
40 | def levenshtein_distance(a, b, limit=None):
41 |     """Returns the Levenshtein edit distance between two strings - adapted from Whoosh"""
42 | 
43 |     a = ''.join(re.findall('\w+', strip_stopwords(a.lower())))
44 |     b = ''.join(re.findall('\w+', strip_stopwords(b.lower())))
45 | 
46 |     prev = None
47 |     thisrow = range(1, len(b) + 1) + [0]
48 |     for x in xrange(len(a)):
49 |         # Python lists wrap around for negative indices, so put the
50 |         # leftmost column at the *end* of the list. This matches with
51 |         # the zero-indexed strings and saves extra calculation.
52 |         prev, thisrow = thisrow, [0] * len(b) + [x + 1]
53 |         for y in xrange(len(b)):
54 |             delcost = prev[y] + 1
55 |             addcost = thisrow[y - 1] + 1
56 |             subcost = prev[y - 1] + (a[x] != b[y])
57 |             thisrow[y] = min(delcost, addcost, subcost)
58 | 
59 |         if limit and x > limit and min(thisrow) > limit:
60 |             return limit + 1
61 | 
62 |     return thisrow[len(b) - 1]
63 | 
64 | 
65 | def damerau_levenshtein_distance(a, b, limit=None):
66 |     """Returns the Damerau-Levenshtein edit distance between two strings - adapted from Whoosh"""
67 | 
68 |     a = ''.join(re.findall('\w+', strip_stopwords(a.lower())))
69 |     b = ''.join(re.findall('\w+', strip_stopwords(b.lower())))
70 | 
71 |     oneago = None
72 |     thisrow = list(range(1, len(b) + 1)) + [0]
73 |     for x in xrange(len(a)):
74 |         # Python lists wrap around for negative indices, so put the
75 |         # leftmost column at the *end* of the list. This matches with
76 |         # the zero-indexed strings and saves extra calculation.
77 |         twoago, oneago, thisrow = oneago, thisrow, [0] * len(b) + [x + 1]
78 |         for y in xrange(len(b)):
79 |             delcost = oneago[y] + 1
80 |             addcost = thisrow[y - 1] + 1
81 |             subcost = oneago[y - 1] + (a[x] != b[y])
82 |             thisrow[y] = min(delcost, addcost, subcost)
83 |             # This block deals with transpositions
84 |             if (x > 0 and y > 0 and a[x] == b[y - 1]
85 |                 and a[x - 1] == b[y] and a[x] != b[y]):
86 |                 thisrow[y] = min(thisrow[y], twoago[y - 2] + 1)
87 | 
88 |         if limit and x > limit and min(thisrow) > limit:
89 |             return limit + 1
90 | 
91 |     return thisrow[len(b) - 1]
92 | 


--------------------------------------------------------------------------------
/logkit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Copyright (c) 2012, Rui Carmo
 6 | Description: Utility functions
 7 | License: MIT (see LICENSE.md for details)
 8 | """
 9 | 
10 | import json
11 | import logging
12 | import os
13 | 
14 | log = logging.getLogger()
15 | 
16 | from collections import deque
17 | 
18 | from pygments import highlight
19 | from pygments.lexers import get_lexer_by_name, guess_lexer
20 | from pygments.formatters import TerminalFormatter, Terminal256Formatter, NullFormatter
21 | 
22 | 
23 | class InMemoryHandler(logging.Handler):
24 |     """In memory logging handler with a circular buffer"""
25 | 
26 |     def __init__(self, limit=8192):
27 |         # run the regular Handler __init__
28 |         logging.Handler.__init__(self)
29 |         # Our custom argument
30 |         self.limit = limit
31 |         self.flush()
32 | 
33 |     def emit(self, record):
34 |         self.records.append(self.format(record))
35 | 
36 |     def flush(self):
37 |         self.records = deque([], self.limit)
38 | 
39 |     def dump(self):
40 |         return self.records
41 |         
42 | 
43 | class ColorFormatter(logging.Formatter) :
44 |     """Console logging formatter with coloring"""
45 |     _colors  = {
46 |       "DEBUG"   : "\033[22;32m", # green
47 |       "INFO"    : "\033[01;34m", # violet
48 |       "WARNING" : "\033[22;35m", # magenta
49 |       "ERROR"   : "\033[22;31m", # red
50 |       "CRITICAL": "\033[01;31m"  # bold red
51 |     };
52 | 
53 |     def format(self, record):
54 |         if 'color' in os.environ.get('TERM', ''):
55 |             if(self._colors.has_key(record.levelname)):
56 |                 record.levelname = "%s%s\033[0;0m" % (self._colors[record.levelname],  record.levelname)
57 |             record.msg = "\033[37m\033[1m%s\033[0;0m" % record.msg
58 |         return logging.Formatter.format(self, record)  
59 | 
60 | 
61 | class PygmentsHandler(logging.StreamHandler):
62 |     """Console logging handler with syntax highlighting"""
63 | 
64 |     def __init__(self, stream=None, syntax="guess", encoding='utf-8', style='default'):
65 |         # run the regular Handler __init__
66 |         logging.StreamHandler.__init__(self,stream)
67 |         self.pformatter = (Terminal256Formatter(encoding=encoding, style=style)
68 |              if '256color' in os.environ.get('TERM', '')
69 |              else TerminalFormatter(encoding=encoding,style=style))
70 |         if not stream.isatty():
71 |             self.pformatter = NullFormatter
72 |         if syntax == "guess":
73 |             self.lexer = guess_lexer
74 |         else:
75 |             self.lexer = get_lexer_by_name(syntax)
76 | 
77 |     def emit(self, record):
78 |         if self.pformatter == NullFormatter:
79 |             return
80 |         msg = self.format(record)
81 |         # Note that the guessing also applies to any log formatting
82 |         if self.lexer == guess_lexer:
83 |             lexer = guess_lexer(msg)
84 |             self.stream.write(highlight(msg,lexer,self.pformatter))
85 |             return
86 |         self.stream.write(highlight(msg,self.lexer,self.pformatter))
87 | 
88 | 
89 | def json_ansi(item, stream, sort_keys=True, indent=0, separators=(',', ':'), encoding='utf-8', style='default'):
90 |     """Helper function to pretty-print JSON via Pygments"""
91 | 
92 |     formatter = (Terminal256Formatter(encoding=encoding,style=style)
93 |         if '256color' in os.environ.get('TERM', '')
94 |         else TerminalFormatter(encoding=encoding,style=style))
95 |     if not stream.isatty():
96 |         formatter = NullFormatter
97 |     lexer = get_lexer_by_name('json')
98 |     stream.write(highlight(json.dumps(item,sort_keys, indent, separators),lexer,formatter))
99 | 


--------------------------------------------------------------------------------
/imagekit.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Image utilities
  5 | 
  6 | Created by: Rui Carmo
  7 | License: MIT (see LICENSE for details)
  8 | """
  9 | 
 10 | from operator import itemgetter
 11 | 
 12 | def linear_partition(seq, k):
 13 |     if k <= 0:
 14 |         return []
 15 |     n = len(seq) - 1
 16 |     if k > n:
 17 |         return map(lambda x: [x], seq)
 18 |     table, solution = linear_partition_table(seq, k)
 19 |     k, ans = k-2, []
 20 |     while k >= 0:
 21 |         ans = [[seq[i] for i in xrange(solution[n-1][k]+1, n+1)]] + ans
 22 |         n, k = solution[n-1][k], k-1
 23 |     return [[seq[i] for i in xrange(0, n+1)]] + ans
 24 | 
 25 | 
 26 | def linear_partition_table(seq, k):
 27 |     n = len(seq)
 28 |     table = [[0] * k for x in xrange(n)]
 29 |     solution = [[0] * (k-1) for x in xrange(n-1)]
 30 |     for i in xrange(n):
 31 |         table[i][0] = seq[i] + (table[i-1][0] if i else 0)
 32 |     for j in xrange(k):
 33 |         table[0][j] = seq[0]
 34 |     for i in xrange(1, n):
 35 |         for j in xrange(1, k):
 36 |             table[i][j], solution[i-1][j-1] = min(
 37 |                 ((max(table[x][j-1], table[i][0]-table[x][0]), x) for x in xrange(i)),
 38 |                 key=itemgetter(0))
 39 |     return (table, solution)
 40 | 
 41 | 
 42 | def get_info(data):
 43 |     """Parses a small buffer and attempts to return basic image metadata"""
 44 |     
 45 |     data = str(data)
 46 |     size = len(data)
 47 |     height = -1
 48 |     width = -1
 49 |     content_type = ''
 50 | 
 51 |     # handle GIFs
 52 |     if (size >= 10) and data[:6] in ('GIF87a', 'GIF89a'):
 53 |         # Check to see if content_type is correct
 54 |         content_type = 'image/gif'
 55 |         w, h = struct.unpack("<HH", data[6:10])
 56 |         width = int(w)
 57 |         height = int(h)
 58 | 
 59 |     # See PNG 2. Edition spec (http://www.w3.org/TR/PNG/)
 60 |     # Bytes 0-7 are below, 4-byte chunk length, then 'IHDR'
 61 |     # and finally the 4-byte width, height
 62 |     elif ((size >= 24) and data.startswith('\211PNG\r\n\032\n')
 63 |           and (data[12:16] == 'IHDR')):
 64 |         content_type = 'image/png'
 65 |         w, h = struct.unpack(">LL", data[16:24])
 66 |         width = int(w)
 67 |         height = int(h)
 68 | 
 69 |     # Maybe this is for an older PNG version.
 70 |     elif (size >= 16) and data.startswith('\211PNG\r\n\032\n'):
 71 |         # Check to see if we have the right content type
 72 |         content_type = 'image/png'
 73 |         w, h = struct.unpack(">LL", data[8:16])
 74 |         width = int(w)
 75 |         height = int(h)
 76 | 
 77 |     # Check for a JPEG
 78 |     elif (size >= 4):                                                          
 79 |         jpeg = StringIO.StringIO(data)                                         
 80 |         b = jpeg.read(4)                                                       
 81 |         if b.startswith('\xff\xd8\xff\xe0'):                                   
 82 |             content_type = 'image/jpeg'                                        
 83 |             bs = jpeg.tell()                                                   
 84 |             b = jpeg.read(2)                                                   
 85 |             bl = (ord(b[0]) << 8) + ord(b[1])                                  
 86 |             b = jpeg.read(4)                                                   
 87 |             if b.startswith("JFIF"):                                           
 88 |                 bs += bl                                                       
 89 |                 while(bs < len(data)):                                         
 90 |                     jpeg.seek(bs)                                              
 91 |                     b = jpeg.read(4)                                           
 92 |                     bl = (ord(b[2]) << 8) + ord(b[3])                          
 93 |                     if bl >= 7 and b[0] == '\xff' and b[1] == '\xc0':          
 94 |                         jpeg.read(1)                                           
 95 |                         b = jpeg.read(4)                                       
 96 |                         height = (ord(b[0]) << 8) + ord(b[1])                  
 97 |                         width = (ord(b[2]) << 8) + ord(b[3])                   
 98 |                         break                                                  
 99 |                     bs = bs + bl + 2      
100 |     return width, height, content_type  
101 | 


--------------------------------------------------------------------------------
/timekit.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | Copyright (c) 2012, Rui Carmo
  6 | Description: Utility functions for handling date and time information
  7 | License: MIT (see LICENSE.md for details)
  8 | """
  9 | 
 10 | import time
 11 | import math
 12 | import re
 13 | import logging
 14 | import datetime
 15 | import gettext
 16 | 
 17 | gettext.textdomain('date')
 18 | _ = gettext.gettext
 19 | 
 20 | log = logging.getLogger()
 21 | 
 22 | 
 23 | # Embrace and extend Mark's feedparser mechanism
 24 | 
 25 | _textmate_date_re = \
 26 |     re.compile('(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})$')
 27 | 
 28 | 
 29 | def parse_date(date):
 30 |     """Parse a TextMate date (YYYY-MM-DD HH:MM:SS, no time zone, assume it's always localtime)"""
 31 | 
 32 |     m = _textmate_date_re.match(date)
 33 |     try:
 34 |         from feedparser import _parse_date
 35 |         if not m:
 36 |             return time.mktime(_parse_date(date))
 37 |     except:
 38 |         pass
 39 | 
 40 |     return time.mktime(time.localtime(calendar.timegm(time.gmtime(time.mktime(time.strptime(date,
 41 |                        '%Y-%m-%d %H:%M:%S'))))))
 42 | 
 43 | 
 44 | def iso_time(value=None):
 45 |     """Format a timestamp in ISO format"""
 46 | 
 47 |     if value == None:
 48 |         value = time.localtime()
 49 |     tz = time.timezone / 3600
 50 |     return time.strftime('%Y-%m-%dT%H:%M:%S-', value) + '%(tz)02d:00' \
 51 |         % vars()
 52 | 
 53 | 
 54 | def http_time(value=None):
 55 |     """Format a timestamp for HTTP headers"""
 56 | 
 57 |     if value == None:
 58 |         value = time.time()
 59 |     return time.strftime('%a, %d %b %Y %H:%M:%S GMT',
 60 |                          time.gmtime(value))
 61 | 
 62 | 
 63 | def plain_date(date, rss=False):
 64 |     """Format a date consistently"""
 65 | 
 66 |     if isinstance(date, float) or isinstance(date, int):
 67 |         date = time.localtime(date)
 68 | 
 69 |     # trickery to replace leading zero in month day
 70 | 
 71 |     mday = time.strftime(' %d', date).replace(' 0', ' ').strip()
 72 |     weekday = _(time.strftime('%A', date))
 73 |     month = _(time.strftime('%b', date))
 74 |     year = time.strftime('%Y', date)
 75 | 
 76 |     # build English ordinal suffixes
 77 | 
 78 |     day = int(mday)
 79 |     if day > 20:
 80 |         day = int(mday[1])
 81 |     try:
 82 |         suffix = ['th', 'st', 'nd', 'rd'][day]
 83 |     except:
 84 |         suffix = 'th'
 85 |     if rss:
 86 |         return _('rss_update_date_format') % locals()
 87 |     else:
 88 |         return _('journal_date_format') % locals()
 89 | 
 90 | 
 91 | def fuzzy_time(date=None):
 92 |     intervals = {
 93 |         '00:00-00:59': 'latenight',
 94 |         '01:00-03:59': 'weehours',
 95 |         '04:00-06:59': 'dawn',
 96 |         '07:00-08:59': 'breakfast',
 97 |         '09:00-12:29': 'morning',
 98 |         '12:30-14:29': 'lunchtime',
 99 |         '14:30-16:59': 'afternoon',
100 |         '17:00-17:29': 'teatime',
101 |         '17:30-18:59': 'lateafternoon',
102 |         '19:00-20:29': 'evening',
103 |         '20:30-21:29': 'dinnertime',
104 |         '21:30-22:29': 'night',
105 |         '22:30-23:59': 'latenight',
106 |     }
107 |     if isinstance(date, float) or isinstance(date, int):
108 |         date = time.localtime(date)
109 |     then = time.strftime('%H:%M', date)
110 |     for i in intervals.keys():
111 |         (l, u) = i.split('-')
112 |         # cheesy (but perfectly usable) string comparison
113 |         if l <= then and then <= u:
114 |             return _(intervals[i])
115 |     return None
116 | 
117 | 
118 | def relative_time(value=None, addtime=False):
119 |     """
120 |     A simple time string
121 |     """
122 | 
123 |     value = float(value)
124 |     if addtime:
125 |         format = ', %H:%M'
126 |     else:
127 |         format = ''
128 |     if time.localtime(value)[0] != time.localtime()[0]:
129 | 
130 |     # we have a different year
131 | 
132 |         format = ' %Y' + format
133 |     format = time.strftime('%b', time.localtime(value)) + ' %d' \
134 |         + format
135 |     return time.strftime(format, time.localtime(value)).strip()
136 | 
137 | 
138 | def time_since(older=None, newer=None, detail=2):
139 |     """
140 |     Human-readable time strings, based on Natalie Downe's code from
141 |     http://blog.natbat.co.uk/archive/2003/Jun/14/time_since
142 |     Assumes time parameters are in seconds
143 |     """
144 | 
145 |     intervals = {  # corrected from the initial 31536000
146 |         31556926: 'year',
147 |         2592000: 'month',
148 |         604800: 'week',
149 |         86400: 'day',
150 |         3600: 'hour',
151 |         60: 'minute',
152 |     }
153 |     chunks = intervals.keys()
154 | 
155 |     # Reverse sort using a lambda for Python 2.3 compatibility
156 |     chunks.sort(lambda x, y: y - x)
157 | 
158 |     if newer == None:
159 |         newer = time.time()
160 | 
161 |     interval = newer - older
162 |     if interval < 0:
163 |         return _('some_time')
164 | 
165 |     # We should ideally do this:
166 |     # raise ValueError('Time interval cannot be negative')
167 |     # but it makes sense to fail gracefully here
168 | 
169 |     if interval < 60:
170 |         return _('less_1min')
171 | 
172 |     output = ''
173 |     for steps in range(detail):
174 |         for seconds in chunks:
175 |             count = math.floor(interval / seconds)
176 |             unit = intervals[seconds]
177 |             if count != 0:
178 |                 break
179 |         if count > 1:
180 |             unit = unit + 's'
181 |         if count != 0:
182 |             output = output + '%d %s, ' % (count, _(unit))
183 |         interval = interval - count * seconds
184 |     output = output[:-2]
185 |     return output
186 | 
187 | 
188 | def datetime_to_epoch(dt):
189 |     epoch = datetime.datetime.utcfromtimestamp(0)
190 |     delta = dt - epoch
191 |     return delta.total_seconds()
192 | 


--------------------------------------------------------------------------------
/taskkit.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | Copyright (c) 2012, Rui Carmo
  6 | Description: In-process job management
  7 | License: MIT (see LICENSE.md for details)
  8 | """
  9 | 
 10 | from Queue import Empty, Queue, PriorityQueue
 11 | from collections import defaultdict
 12 | from functools import partial
 13 | from signal import signal, SIGINT, SIGTERM, SIGHUP
 14 | import sys, logging 
 15 | from threading import Semaphore, Thread
 16 | import time, traceback, ctypes
 17 | from uuid import uuid4
 18 | from cPickle import dumps, loads
 19 | import multiprocessing
 20 | 
 21 | log = logging.getLogger(__name__)
 22 | 
 23 | default_priority = 0
 24 | max_workers = multiprocessing.cpu_count() * 2
 25 | channels = {}
 26 | closed = {}
 27 | 
 28 | class Pool:
 29 |     """Represents a thread pool"""
 30 | 
 31 |     def __init__(self, workers = max_workers, rate_limit = 1000):
 32 |         self.max_workers = workers
 33 |         self.mutex       = Semaphore()
 34 |         self.results     = {}
 35 |         self.retries     = defaultdict(int)
 36 |         self.queue       = PriorityQueue()
 37 |         self.threads     = []
 38 |         self.rate_limit  = rate_limit
 39 |         self.running     = True
 40 | 
 41 |     def _tick(self):
 42 |         time.sleep(1.0/self.rate_limit)
 43 |         # clean up finished threads
 44 |         self.threads = [t for t in self.threads if t.isAlive()]
 45 |         return (not self.queue.empty()) or (len(self.threads) > 0)
 46 | 
 47 | 
 48 |     def _loop(self):
 49 |         """Handle task submissions"""
 50 | 
 51 |         def run_task(priority, f, uuid, retries, args, kwargs):
 52 |             """Run a single task"""
 53 |             try:
 54 |                 t.name = getattr(f, '__name__', None)
 55 |                 result = f(*args, **kwargs)
 56 |             except Exception as e:
 57 |                 # Retry the task if applicable
 58 |                 if log:
 59 |                     log.error(traceback.format_exc())
 60 |                 if retries > 0:
 61 |                     with self.mutex:
 62 |                         self.retries[uuid] += 1
 63 |                     # re-queue the task with a lower (i.e., higher-valued) priority
 64 |                     self.queue.put((priority+1, dumps((f, uuid, retries - 1, args, kwargs))))
 65 |                     self.queue.task_done()
 66 |                     return
 67 |                 result = e
 68 |             with self.mutex:
 69 |                 self.results[uuid] = dumps(result)
 70 |                 self.retries[uuid] += 1
 71 |             self.queue.task_done()
 72 | 
 73 |         while self._tick():
 74 |             # spawn more threads to fill free slots
 75 |             log.debug("Running %d/%d threads" % (len(self.threads),self.max_workers))
 76 |             if self.running and len(self.threads) < self.max_workers:
 77 |                 log.debug("Queue Length: %d" % self.queue.qsize())
 78 |                 try:
 79 |                     priority, data = self.queue.get(True, 1.0/self.rate_limit)
 80 |                 except Empty:
 81 |                     continue
 82 |                 f, uuid, retries, args, kwargs = loads(data)
 83 |                 log.debug(f)
 84 |                 t = Thread(target=run_task, args=[priority, f, uuid, retries, args, kwargs])
 85 |                 t.setDaemon(True)
 86 |                 self.threads.append(t)
 87 |                 t.start()
 88 |         log.debug("Exited loop.")
 89 |         for t in self.threads:
 90 |             t.join()
 91 | 
 92 | 
 93 |     def kill_all(self):
 94 |         """Very hacky way to kill threads by tossing an exception into their state"""
 95 |         for t in self.threads:
 96 |             ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(t.ident), ctypes.py_object(SystemExit))
 97 | 
 98 | 
 99 |     def stop(self):
100 |         """Flush the job queue"""
101 |         self.running = False
102 |         self.queue = PriorityQueue()
103 | 
104 | 
105 |     def start(self, daemonize=False):
106 |         """Pool entry point"""
107 | 
108 |         self.results = {}
109 |         self.retries = defaultdict(int)
110 | 
111 |         if daemonize:
112 |             t = Thread(target = self._loop, args=[self])
113 |             t.setDaemon(True)
114 |             t.start()
115 |             return
116 |         else:
117 |             self._loop()
118 | 
119 | 
120 | default_pool = Pool()
121 | 
122 | class Deferred(object):
123 |     """Allows lookup of task results and status"""
124 |     def __init__(self, pool, uuid):
125 |         self.uuid    = uuid
126 |         self.pool    = pool
127 |         self._result = None
128 | 
129 |     @property
130 |     def result(self):
131 |         if self._result is None:
132 |             with self.pool.mutex:
133 |                 if self.uuid in self.pool.results.keys():
134 |                    self._result = loads(self.pool.results[self.uuid])
135 |         return self._result
136 | 
137 |     @property
138 |     def retries(self):
139 |         return self.pool.retries[self.uuid]
140 | 
141 | 
142 | def task(func=None, pool=None, max_retries=0, priority=default_priority):
143 |     """Task decorator - setus up a .delay() attribute in the task function"""
144 | 
145 |     if func is None:
146 |         return partial(task, pool=pool, max_retries=max_retries)
147 | 
148 |     if pool is None:
149 |         pool = default_pool
150 | 
151 |     def delay(*args, **kwargs):
152 |         uuid = str(uuid4()) # one for each task
153 |         pool.queue.put((priority,dumps((func, uuid, max_retries, args, kwargs))))
154 |         return Deferred(pool, uuid)
155 |     func.delay = delay
156 |     func.pool = pool
157 |     return func
158 | 
159 | 
160 | def go(*args, **kwargs):
161 |     """Queue up a function, Go-style"""
162 |     uuid = str(uuid4()) # one for each task
163 |     default_pool.queue.put((default_priority,dumps((args[0], uuid, 0, args[1:], kwargs))))
164 |     return Deferred(default_pool, uuid)
165 | 
166 | 
167 | class Channel:
168 |     """A serializable shim that proxies to a Queue object"""
169 |     def __init__(self, size):
170 |         self.uuid = str(uuid4()) # one for each task
171 |         channels[self.uuid] = Queue(size)
172 | 
173 |     def recv(self):
174 |         return channels[self.uuid].get()
175 | 
176 |     def send(self, item):
177 |         if self.uuid in closed:
178 |             raise RuntimeError("Channel is closed.")
179 |         channels[self.uuid].put(item)
180 | 
181 |     def close(self):
182 |         closed[self.uuid] = True
183 | 
184 |     def __iter__(self):
185 |         yield self.recv()
186 |         while True:
187 |             try:
188 |                 res = channels[self.uuid].get(True, 1.0/default_pool.rate_limit)
189 |                 yield res
190 |             except Empty:
191 |                 # check channel again and end iteration if closed
192 |                 if channels[self.uuid].empty() and (self.uuid in closed):
193 |                     return
194 | 
195 | 
196 | def chan(size = 0):
197 |     """Return a shim that acts like a Go channel"""
198 |     return Channel(size)
199 | 
200 | 
201 | def halt(signal, frame):
202 |     default_pool.stop()
203 |     default_pool.kill_all()
204 |     sys.exit()
205 | 
206 | 
207 | def start(daemonize = False):
208 |     signal(SIGINT, halt)
209 |     signal(SIGTERM, halt)
210 |     default_pool.start(daemonize = daemonize)
211 | 


--------------------------------------------------------------------------------
/decorators.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Decorator functions
  5 | 
  6 | Created by: Rui Carmo
  7 | """
  8 | 
  9 | from bottle import request, response, route, abort
 10 | import time, binascii, hashlib, email.utils, functools, json, cProfile, collections
 11 | from datetime import datetime
 12 | import logging
 13 | from core import tb
 14 | 
 15 | # Allow importing even when Redis bindings aren't present
 16 | try:
 17 |     from redis import StrictRedis as Redis
 18 | except ImportError:
 19 |     pass
 20 | 
 21 | log = logging.getLogger()
 22 | 
 23 | gmt_format_string = "%a, %d %b %Y %H:%M:%S GMT"
 24 | 
 25 | 
 26 | class CustomEncoder(json.JSONEncoder):
 27 |     """Custom encoder that serializes datetimes into JS-compliant times"""
 28 | 
 29 |     def default(self, obj):
 30 |         if isinstance(obj, datetime):
 31 |             epoch = datetime.utcfromtimestamp(0)
 32 |             delta = obj - epoch
 33 |             return int(delta.total_seconds()) * 1000
 34 |         return json.JSONEncoder.default(self, obj)
 35 | 
 36 | 
 37 | def cache_redis(r, prefix='url', ttl=3600):
 38 |     """Cache route results in Redis"""
 39 | 
 40 |     def decorator(callback):
 41 |         @functools.wraps(callback)
 42 |         def wrapper(*args, **kwargs):
 43 |             try:
 44 |                 item = json.loads(r.get('%s:%s' % (prefix,request.urlparts.path)))
 45 |                 body = item['body']
 46 |                 for h in item['headers']:
 47 |                     response.set_header(str(h), item['headers'][h])
 48 |                 response.set_header('X-Source', 'Redis')
 49 |             except Exception as e:
 50 |                 log.debug("Redis cache miss for %s" % request.urlparts.path)
 51 |                 body = callback(*args, **kwargs)
 52 |                 item = {
 53 |                     'body': body,
 54 |                     'headers': dict(response.headers),
 55 |                     'mtime': int(time.time())
 56 |                 }
 57 |                 k = '%s:%s' % (prefix, request.urlparts.path)
 58 |                 r.set(k, json.dumps(item))
 59 |                 r.expire(k, ttl)
 60 |             return body
 61 |         return wrapper
 62 |     return decorator
 63 | 
 64 | 
 65 | def cache_results(timeout=0):
 66 |     """Cache route results for a given period of time"""
 67 | 
 68 |     def decorator(callback):
 69 |         _cache = {}
 70 |         _times = {}
 71 | 
 72 |         @functools.wraps(callback)
 73 |         def wrapper(*args, **kwargs):
 74 | 
 75 |             def expire(when):
 76 |                 for t in [k for k in _times.keys()]:
 77 |                     if (when - t) > timeout:
 78 |                         del(_cache[_times[t]])
 79 |                         del(_times[t])
 80 | 
 81 |             now = time.time()
 82 |             try:
 83 |                 item = _cache[request.urlparts]
 84 |                 if 'If-Modified-Since'  in request.headers:
 85 |                     try:
 86 |                         since = time.mktime(email.utils.parsedate(request.headers['If-Modified-Since']))
 87 |                     except:
 88 |                         since = now
 89 |                     if item['mtime'] >= since:
 90 |                         expire(now)
 91 |                         abort(304,'Not modified')
 92 |                 for h in item['headers']:
 93 |                     response.set_header(str(h), item['headers'][h])
 94 |                 body = item['body']
 95 |                 response.set_header('X-Source', 'Worker Cache')
 96 |             except KeyError:
 97 |                 body = callback(*args, **kwargs)
 98 |                 item = {
 99 |                     'body': body,
100 |                     'headers': response.headers,
101 |                     'mtime': int(now)
102 |                 }
103 |                 _cache[request.urlparts] = item
104 |                 _times[now] = request.urlparts
105 | 
106 |             expire(now)
107 |             return body
108 |         return wrapper
109 |     return decorator
110 | 
111 | 
112 | def cache_control(seconds = 0):
113 |     """Insert HTTP caching headers"""
114 | 
115 |     def decorator(callback):
116 |         @functools.wraps(callback)
117 |         def wrapper(*args, **kwargs):
118 |             expires = int(time.time() + seconds)
119 |             expires = time.strftime(gmt_format_string, time.gmtime(expires))
120 |             response.set_header('Expires', expires)
121 |             if seconds:
122 |                 pragma = 'public'
123 |             else:
124 |                 pragma = 'no-cache, must-revalidate'
125 |             response.set_header('Cache-Control', "%s, max-age=%s" % (pragma, seconds))
126 |             response.set_header('Pragma', pragma)
127 |             return callback(*args, **kwargs)
128 |         return wrapper
129 |     return decorator
130 | 
131 | 
132 | def profile(filename=None):
133 |     """Profiling decorator for functions taking one or more arguments"""
134 | 
135 |     def decorator(callback):
136 |         @functools.wraps(callback)
137 |         def wrapper(*args, **kwargs):
138 |             import cProfile
139 |             import logging
140 |             log.info('Profiling %s' % (callback.__name__))
141 |             try:
142 |                 profiler = cProfile.Profile()
143 |                 res = profiler.runcall(callback, *args, **kwargs)
144 |                 profiler.dump_stats(filename or '%s_fn.profile' % (callback.__name__))
145 |             except IOError:
146 |                 log.exception(_("Could not open profile '%(filename)s'") % {"filename": filename})
147 |             return res
148 |         return wrapper
149 |     return decorator
150 | 
151 | 
152 | def timed(callback):
153 |     """Decorator for timing route processing"""
154 | 
155 |     @functools.wraps(callback)
156 |     def wrapper(*args, **kwargs):
157 |         start = time.time()
158 |         body = callback(*args, **kwargs)
159 |         end = time.time()
160 |         response.set_header('X-Processing-Time', str(end - start))
161 |         return body
162 |     return wrapper
163 | 
164 | 
165 | def jsonp(callback):
166 |     """Decorator for JSONP handling"""
167 | 
168 |     @functools.wraps(callback)
169 |     def wrapper(*args, **kwargs):
170 |         body = callback(*args, **kwargs)
171 |         try:
172 |             body = json.dumps(body, cls=CustomEncoder)
173 |             # Set content type only if serialization successful
174 |             response.content_type = 'application/json'
175 |         except Exception, e:
176 |             return body
177 | 
178 |         callback_function = request.query.get('callback')
179 |         if callback_function:
180 |             body = ''.join([callback_function, '(', body, ')'])
181 |             response.content_type = 'text/javascript'
182 | 
183 |         response.set_header('Last-Modified', time.strftime(gmt_format_string, time.gmtime()))
184 |         response.set_header('ETag', binascii.b2a_base64(hashlib.sha1(body).digest()).strip())
185 |         response.set_header('Content-Length', len(body))
186 |         return body
187 |     return wrapper
188 | 
189 | 
190 | def memoize(f):
191 |     """Memoization decorator for functions taking one or more arguments"""
192 | 
193 |     class memodict(dict):
194 |         def __init__(self, f):
195 |             self.f = f
196 | 
197 |         def __call__(self, *args):
198 |             return self[args]
199 | 
200 |         def __missing__(self, key):
201 |             res = self[key] = self.f(*key)
202 |             return res
203 | 
204 |         def __repr__(self):
205 |             return self.f.__doc__
206 |         
207 |         def __get__(self, obj, objtype):
208 |             return functools.partial(self.__call__, obj)
209 |     return memodict(f)
210 | 
211 | 
212 | def lru_cache(limit=100):
213 |     """Least-recently-used cache decorator"""
214 | 
215 |     def inner_function(callback):
216 |         cache = collections.OrderedDict()
217 | 
218 |         @functools.wraps(callback)
219 |         def wrapper(*args, **kwargs):
220 |             key = args
221 |             if kwargs:
222 |                 key += tuple(sorted(kwargs.items()))
223 |             try:
224 |                 result = cache.pop(key)
225 |             except KeyError:
226 |                 result = callback(*args, **kwargs)
227 |                 if len(cache) >= limit:
228 |                     cache.popitem(0)
229 |             cache[key] = result # refresh position
230 |             return result
231 |         return wrapper
232 |     return inner_function
233 | 


--------------------------------------------------------------------------------
/urlkit.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | Copyright (c) 2013, Rui Carmo
  6 | Description: Utility functions for retrieving CPU statistics
  7 | License: MIT (see LICENSE.md for details)
  8 | """
  9 | 
 10 | import logging
 11 | import os
 12 | import sys
 13 | 
 14 | log = logging.getLogger()
 15 | 
 16 | import re
 17 | import gzip
 18 | import base64
 19 | import tempfile
 20 | import urllib
 21 | import urllib2
 22 | import urlparse
 23 | from StringIO import StringIO
 24 | from xml.dom.minidom import parseString
 25 | from urllib2 import HTTPCookieProcessor, HTTPRedirectHandler, HTTPDefaultErrorHandler, HTTPError
 26 | import cookielib
 27 | from collections import defaultdict
 28 | from utils.core import tb
 29 | from config import settings
 30 | from utils.decorators import memoize
 31 | from datetime import datetime
 32 | 
 33 | # Initialize debug level upon module load
 34 | #httplib.HTTPConnection.debuglevel = settings.httplib.debuglevel
 35 | 
 36 | @memoize
 37 | def shorten(url):
 38 |     """Minimalist URL shortener using SAPO services"""
 39 |     u = '?'.join(('http://services.sapo.pt/PunyURL/GetCompressedURLByURL', urllib.urlencode({'url':url})))
 40 |     try:
 41 |         x = parseString(fetch(u)['data'])
 42 |         return x.getElementsByTagName('ascii')[0].firstChild.data
 43 |     except:
 44 |         return url
 45 |         
 46 | 
 47 | @memoize
 48 | def agnostic_shortener(url):
 49 |     """A more flexible URL shortener"""
 50 |     
 51 |     services = {
 52 |         'tinyurl.com':'/api-create.php?url=',
 53 |         'is.gd'      :'/api.php?longurl=',
 54 |         #'api.bit.ly':"http://api.bit.ly/shorten?version=2.0.1&%s&format=text&longUrl=" % BITLY_AUTH,
 55 |         'api.tr.im'  :'/api/trim_simple?url='
 56 |     }
 57 |   
 58 |     for shortener in self.services.keys():
 59 |         try:
 60 |             res = fetch(self.services[shortener] + urllib.quote(url))
 61 |             shorturl = res['data'].strip()
 62 |             if ("Error" not in shorturl) and ("http://" + urlparse.urlparse(shortener)[1] in shorturl):
 63 |                 return shorturl
 64 |             else:
 65 |                 continue
 66 |         except:
 67 |             log.warn("%s: %s" % (tb(),url))
 68 |             pass
 69 |     return url
 70 | 
 71 | 
 72 | def expand(url, remove_junk = True, timeout = None):
 73 |     """Resolve short URLs"""
 74 |     url = unicode(url)
 75 |     result = url
 76 | 
 77 |     #log.debug(u"%s -> ?" % url)
 78 |     
 79 |     (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
 80 |     
 81 |     if scheme not in ['http','https']:
 82 |         return result
 83 |     
 84 |     # time sinks that aren't worth expanding further
 85 |     if re.match( "(" + ")|(".join([i.replace('.','\.').replace('*','.+') for i in settings.expander.ignore]) + ")", netloc):
 86 |         return result
 87 |         
 88 |     res = {}    
 89 |     user_agents = defaultdict(lambda: settings.fetcher.user_agent)
 90 |     user_agents.update(settings.expander.user_agents)
 91 |     user_agent = user_agents[netloc]
 92 |     
 93 |     try:
 94 |         res = fetch(url, head=True, timeout=timeout, user_agent=user_agent)
 95 |     except: 
 96 |         #log.debug(u"%s: %s" % (tb(),url))
 97 |         pass
 98 |     
 99 |     if 'url' in res:
100 |         (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(res['url'])
101 |         if scheme not in ['http','https']:
102 |             return result
103 |         else: 
104 |             result = res['url']
105 |     
106 |     if remove_junk:
107 |         result = scrub_query(result)
108 |     #log.debug(u"%s -> %s" % (url,result))
109 |     if fragment:
110 |         return "%s#%s" % (result, fragment)
111 |     else:
112 |         return result
113 | 
114 | 
115 | def scrub_query(url):
116 |     """Clean query arguments"""
117 |     
118 |     scrub = ["utm_source","utm_campaign","utm_medium","piwik_campaign","piwik_kwd"]
119 |      
120 |     url = urlparse.urldefrag(url)[0]
121 |     base, sep, query = url.partition('?')
122 |     seen = set()
123 |     result = []
124 |     for field in query.split('&'):
125 |         name, sep, value = field.partition('=')
126 |         if name in seen:
127 |             continue
128 |         elif name in scrub:
129 |             continue
130 |         else:
131 |             result.append(field)
132 |             seen.add(name)
133 |     result = '?'.join([base, sep.join(result)]) if result else base
134 |     # strip dangling '?'
135 |     if result[-1:] == '?':
136 |         result = result[:-1]
137 |     return result
138 | 
139 | 
140 | def data_uri(content_type, data):
141 |     """Return data as a data: URI scheme"""
142 |     return "data:%s;base64,%s" % (content_type, base64.urlsafe_b64encode(data))
143 |     
144 |    
145 | class SmartRedirectHandler(HTTPRedirectHandler):
146 | 
147 |     def http_error_302(self, req, fp, code, msg, headers):
148 |         result = HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
149 |         result.status = code
150 |         #log.debug("%d %s" % (code, req.get_full_url()))
151 |         return result 
152 | 
153 |     http_error_301 = http_error_303 = http_error_307 = http_error_302
154 | 
155 | 
156 | class DefaultErrorHandler(HTTPDefaultErrorHandler):
157 | 
158 |     def http_error_default(self, req, fp, code, msg, headers):
159 |         result = HTTPError(req.get_full_url(), code, msg, headers, fp) 
160 |         result.status = code
161 |         return result
162 | 
163 | 
164 | def _open_source(source, head, data = None, etag = None, last_modified = None, timeout = None, user_agent = "Mozilla/5.0"):
165 |     """Open anything"""
166 | 
167 |     if hasattr(source, 'read'):
168 |         return source
169 |     if source == '-':
170 |         return sys.stdin
171 | 
172 |     if urlparse.urlparse(source)[0][:4] == 'http':
173 |         request = urllib2.Request(source, data)
174 |         if head and not data:
175 |             request.get_method = lambda: 'HEAD'
176 |         request.add_header('User-Agent', user_agent)
177 |         if etag:
178 |             request.add_header('If-None-Match', etag)
179 |         if last_modified:
180 |             request.add_header('If-Modified-Since', last_modified)
181 |         request.add_header('Accept-encoding', 'gzip')
182 |         jar = cookielib.MozillaCookieJar()                         
183 |         jar.set_policy(cookielib.DefaultCookiePolicy(rfc2965=True, strict_rfc2965_unverifiable=False))
184 |         opener = urllib2.build_opener(SmartRedirectHandler(), HTTPCookieProcessor(jar), DefaultErrorHandler())
185 |         return opener.open(request, None, timeout)
186 |     try:
187 |         return open(source)
188 |     except(IOError,OSError):
189 |         pass
190 |     return StringIO(str(source))
191 | 
192 | 
193 | def fetch(url, data = None, etag = None, last_modified = None, head = False, timeout = None, user_agent = "Mozilla/5.0"):
194 |     """Fetch a URL and return the contents"""
195 | 
196 |     result = {}
197 |     f = _open_source(url, head, data, etag, last_modified, timeout, user_agent)
198 |     if not head:
199 |         result['data'] = f.read()
200 |     if hasattr(f, 'headers'):
201 |         result.update({k.lower(): f.headers.get(k) for k in f.headers})
202 |         if f.headers.get('content-encoding', '') == 'gzip' and not head:
203 |             result['data'] = gzip.GzipFile(fileobj=StringIO(result['data'])).read()
204 |     if hasattr(f.headers, 'last-modified'):
205 |         try:
206 |             result['modified_parsed'] = datetime.strptime(f.headers['last-modified'], "%a, %d %b %Y %H:%M:%S %Z")
207 |         except Exception, e:
208 |             log.debug("Could not parse Last-Modified header '%s'" % f.headers['last-modified'])
209 |             pass
210 |     if hasattr(f, 'url'):
211 |         result['url'] = unicode(f.url)
212 |         result['status'] = 200
213 |     if hasattr(f, 'status'):
214 |         result['status'] = f.status
215 |     f.close()
216 |     return result
217 | 
218 | 
219 | def download(url, filename=None, suffix='', user_agent = "Mozilla/5.0"):
220 |     """Convenience function for downloading a URL directly to the filesystem"""
221 | 
222 |     opener = urllib.FancyURLopener({}) 
223 |     opener.version = user_agent
224 | 
225 |     if not filename:
226 |         fd, filename = tempfile.mkstemp(suffix)
227 |         os.close(fd)
228 | 
229 |     try:
230 |         opener.retrieve(url, filename)
231 |         return filename
232 |     except Exception as e:
233 |         log.error("Could not download %(url)s: %(e)s" % locals())
234 |         return None
235 | 
236 | 


--------------------------------------------------------------------------------