├── .DS_Store ├── .gitignore ├── app.yaml ├── bs4 ├── __init__.py ├── builder │ ├── __init__.py │ ├── _html5lib.py │ ├── _htmlparser.py │ └── _lxml.py ├── dammit.py ├── diagnose.py ├── element.py ├── testing.py └── tests │ ├── __init__.py │ ├── test_builder_registry.py │ ├── test_docs.py │ ├── test_html5lib.py │ ├── test_htmlparser.py │ ├── test_lxml.py │ ├── test_soup.py │ └── test_tree.py ├── cassis.py ├── dateutil ├── __init__.py ├── easter.py ├── parser.py ├── relativedelta.py ├── rrule.py ├── tz.py ├── tzwin.py └── zoneinfo │ ├── __init__.py │ └── zoneinfo-2010g.tar.gz ├── favicon.ico ├── feedparser.py ├── hfeed.html ├── hfeedello.html ├── hfeedmasto.html ├── hovercard.html ├── hovercard2.html ├── hovertest.html ├── html5lib ├── .DS_Store ├── __init__.py ├── constants.py ├── filters │ ├── __init__.py │ ├── _base.py │ ├── alphabeticalattributes.py │ ├── inject_meta_charset.py │ ├── lint.py │ ├── optionaltags.py │ ├── sanitizer.py │ └── whitespace.py ├── html5parser.py ├── ihatexml.py ├── inputstream.py ├── sanitizer.py ├── serializer │ ├── __init__.py │ └── htmlserializer.py ├── tokenizer.py ├── treeadapters │ ├── __init__.py │ └── sax.py ├── treebuilders │ ├── __init__.py │ ├── _base.py │ ├── dom.py │ ├── etree.py │ └── etree_lxml.py ├── treewalkers │ ├── __init__.py │ ├── _base.py │ ├── dom.py │ ├── etree.py │ ├── genshistream.py │ ├── lxmletree.py │ └── pulldom.py ├── trie │ ├── __init__.py │ ├── _base.py │ ├── datrie.py │ └── py.py └── utils.py ├── humanize ├── __init__.py ├── compat.py ├── filesize.py ├── i18n.py ├── locale │ ├── fr_FR │ │ └── LC_MESSAGES │ │ │ └── humanize.po │ ├── ko_KR │ │ └── LC_MESSAGES │ │ │ └── humanize.po │ └── ru_RU │ │ └── LC_MESSAGES │ │ └── humanize.po ├── number.py └── time.py ├── index.html ├── indiecard.html ├── instancelist.html ├── instances.json ├── joyline.svg ├── markedup.html ├── mf2py ├── .DS_Store ├── __init__.py ├── backcompat-rules │ ├── adr.json │ ├── geo.json │ ├── hentry.json │ ├── hfeed.json │ ├── hproduct.json │ ├── hrecipe.json │ ├── hresume.json │ ├── hreview-aggregate.json │ ├── hreview.json │ ├── recipe-main-info.json │ ├── vcard.json │ └── vevent.json ├── backcompat.py ├── datetime_helpers.py ├── dom_helpers.py ├── implied_properties.py ├── mf2_classes.py ├── mf_helpers.py ├── parse_property.py ├── parser.py ├── temp_fixes.py ├── value_class_pattern.py └── version.py ├── mf2tojf2.py ├── oembedcard.html ├── openanything.py ├── queue.yaml ├── requests ├── .DS_Store ├── __init__.py ├── adapters.py ├── api.py ├── auth.py ├── cacert.pem ├── certs.py ├── compat.py ├── cookies.py ├── exceptions.py ├── hooks.py ├── models.py ├── packages │ ├── __init__.py │ ├── chardet │ │ ├── __init__.py │ │ ├── big5freq.py │ │ ├── big5prober.py │ │ ├── chardetect.py │ │ ├── chardistribution.py │ │ ├── charsetgroupprober.py │ │ ├── charsetprober.py │ │ ├── codingstatemachine.py │ │ ├── compat.py │ │ ├── constants.py │ │ ├── cp949prober.py │ │ ├── escprober.py │ │ ├── escsm.py │ │ ├── eucjpprober.py │ │ ├── euckrfreq.py │ │ ├── euckrprober.py │ │ ├── euctwfreq.py │ │ ├── euctwprober.py │ │ ├── gb2312freq.py │ │ ├── gb2312prober.py │ │ ├── hebrewprober.py │ │ ├── jisfreq.py │ │ ├── jpcntx.py │ │ ├── langbulgarianmodel.py │ │ ├── langcyrillicmodel.py │ │ ├── langgreekmodel.py │ │ ├── langhebrewmodel.py │ │ ├── langhungarianmodel.py │ │ ├── langthaimodel.py │ │ ├── latin1prober.py │ │ ├── mbcharsetprober.py │ │ ├── mbcsgroupprober.py │ │ ├── mbcssm.py │ │ ├── sbcharsetprober.py │ │ ├── sbcsgroupprober.py │ │ ├── sjisprober.py │ │ ├── universaldetector.py │ │ └── utf8prober.py │ └── urllib3 │ │ ├── __init__.py │ │ ├── _collections.py │ │ ├── connection.py │ │ ├── connectionpool.py │ │ ├── contrib │ │ ├── __init__.py │ │ ├── ntlmpool.py │ │ └── pyopenssl.py │ │ ├── exceptions.py │ │ ├── fields.py │ │ ├── filepost.py │ │ ├── packages │ │ ├── __init__.py │ │ ├── ordered_dict.py │ │ ├── six.py │ │ └── ssl_match_hostname │ │ │ ├── __init__.py │ │ │ └── _implementation.py │ │ ├── poolmanager.py │ │ ├── request.py │ │ ├── response.py │ │ └── util │ │ ├── __init__.py │ │ ├── connection.py │ │ ├── request.py │ │ ├── response.py │ │ ├── retry.py │ │ ├── ssl_.py │ │ ├── timeout.py │ │ └── url.py ├── sessions.py ├── status_codes.py ├── structures.py └── utils.py ├── shrunkeninline.html ├── shrunkensite.html ├── six.py ├── sparkline.html ├── static ├── awesomplete.min.js ├── fragmention.js ├── index.html ├── landscape.jpg └── landscape2.jpg ├── storycard.html ├── styles ├── awesomplete.css ├── hfeed.css ├── hovercard.css ├── hovercard2.css ├── indiecard.css ├── mastoview.css └── storycard.css ├── unmung.py ├── vrcard.html ├── xoxo.py └── xoxopodcast.xml /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinmarks/unmung/5b8539971a8e1a64694e65643670936512fc987d/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc -------------------------------------------------------------------------------- /app.yaml: -------------------------------------------------------------------------------- 1 | application: unmung2 2 | version: 2 3 | runtime: python27 4 | api_version: 1 5 | threadsafe: true 6 | 7 | handlers: 8 | - url: /styles 9 | static_dir: styles 10 | - url: /images 11 | static_dir: images 12 | - url: /static 13 | static_dir: static 14 | - url: /.* 15 | script: unmung.application 16 | 17 | # [START libraries] 18 | libraries: 19 | - name: webapp2 20 | version: latest 21 | - name: jinja2 22 | version: latest 23 | - name: ssl 24 | version: latest 25 | - name: lxml 26 | version: latest 27 | # [END libraries] -------------------------------------------------------------------------------- /bs4/tests/__init__.py: -------------------------------------------------------------------------------- 1 | "The beautifulsoup tests." 2 | -------------------------------------------------------------------------------- /bs4/tests/test_docs.py: -------------------------------------------------------------------------------- 1 | "Test harness for doctests." 2 | 3 | # pylint: disable-msg=E0611,W0142 4 | 5 | __metaclass__ = type 6 | __all__ = [ 7 | 'additional_tests', 8 | ] 9 | 10 | import atexit 11 | import doctest 12 | import os 13 | #from pkg_resources import ( 14 | # resource_filename, resource_exists, resource_listdir, cleanup_resources) 15 | import unittest 16 | 17 | DOCTEST_FLAGS = ( 18 | doctest.ELLIPSIS | 19 | doctest.NORMALIZE_WHITESPACE | 20 | doctest.REPORT_NDIFF) 21 | 22 | 23 | # def additional_tests(): 24 | # "Run the doc tests (README.txt and docs/*, if any exist)" 25 | # doctest_files = [ 26 | # os.path.abspath(resource_filename('bs4', 'README.txt'))] 27 | # if resource_exists('bs4', 'docs'): 28 | # for name in resource_listdir('bs4', 'docs'): 29 | # if name.endswith('.txt'): 30 | # doctest_files.append( 31 | # os.path.abspath( 32 | # resource_filename('bs4', 'docs/%s' % name))) 33 | # kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) 34 | # atexit.register(cleanup_resources) 35 | # return unittest.TestSuite(( 36 | # doctest.DocFileSuite(*doctest_files, **kwargs))) 37 | -------------------------------------------------------------------------------- /bs4/tests/test_htmlparser.py: -------------------------------------------------------------------------------- 1 | """Tests to ensure that the html.parser tree builder generates good 2 | trees.""" 3 | 4 | from pdb import set_trace 5 | import pickle 6 | from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest 7 | from bs4.builder import HTMLParserTreeBuilder 8 | from bs4.builder._htmlparser import BeautifulSoupHTMLParser 9 | 10 | class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): 11 | 12 | @property 13 | def default_builder(self): 14 | return HTMLParserTreeBuilder() 15 | 16 | def test_namespaced_system_doctype(self): 17 | # html.parser can't handle namespaced doctypes, so skip this one. 18 | pass 19 | 20 | def test_namespaced_public_doctype(self): 21 | # html.parser can't handle namespaced doctypes, so skip this one. 22 | pass 23 | 24 | def test_builder_is_pickled(self): 25 | """Unlike most tree builders, HTMLParserTreeBuilder and will 26 | be restored after pickling. 27 | """ 28 | tree = self.soup("foo") 29 | dumped = pickle.dumps(tree, 2) 30 | loaded = pickle.loads(dumped) 31 | self.assertTrue(isinstance(loaded.builder, type(tree.builder))) 32 | 33 | def test_redundant_empty_element_closing_tags(self): 34 | self.assertSoupEquals('





', "


") 35 | self.assertSoupEquals('


', "") 36 | 37 | def test_empty_element(self): 38 | # This verifies that any buffered data present when the parser 39 | # finishes working is handled. 40 | self.assertSoupEquals("foo &# bar", "foo &# bar") 41 | 42 | 43 | class TestHTMLParserSubclass(SoupTest): 44 | def test_error(self): 45 | """Verify that our HTMLParser subclass implements error() in a way 46 | that doesn't cause a crash. 47 | """ 48 | parser = BeautifulSoupHTMLParser() 49 | parser.error("don't crash") 50 | -------------------------------------------------------------------------------- /bs4/tests/test_lxml.py: -------------------------------------------------------------------------------- 1 | """Tests to ensure that the lxml tree builder generates good trees.""" 2 | 3 | import re 4 | import warnings 5 | 6 | try: 7 | import lxml.etree 8 | LXML_PRESENT = True 9 | LXML_VERSION = lxml.etree.LXML_VERSION 10 | except ImportError, e: 11 | LXML_PRESENT = False 12 | LXML_VERSION = (0,) 13 | 14 | if LXML_PRESENT: 15 | from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML 16 | 17 | from bs4 import ( 18 | BeautifulSoup, 19 | BeautifulStoneSoup, 20 | ) 21 | from bs4.element import Comment, Doctype, SoupStrainer 22 | from bs4.testing import skipIf 23 | from bs4.tests import test_htmlparser 24 | from bs4.testing import ( 25 | HTMLTreeBuilderSmokeTest, 26 | XMLTreeBuilderSmokeTest, 27 | SoupTest, 28 | skipIf, 29 | ) 30 | 31 | @skipIf( 32 | not LXML_PRESENT, 33 | "lxml seems not to be present, not testing its tree builder.") 34 | class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): 35 | """See ``HTMLTreeBuilderSmokeTest``.""" 36 | 37 | @property 38 | def default_builder(self): 39 | return LXMLTreeBuilder() 40 | 41 | def test_out_of_range_entity(self): 42 | self.assertSoupEquals( 43 | "

foo�bar

", "

foobar

") 44 | self.assertSoupEquals( 45 | "

foo�bar

", "

foobar

") 46 | self.assertSoupEquals( 47 | "

foo�bar

", "

foobar

") 48 | 49 | def test_entities_in_foreign_document_encoding(self): 50 | # We can't implement this case correctly because by the time we 51 | # hear about markup like "“", it's been (incorrectly) converted into 52 | # a string like u'\x93' 53 | pass 54 | 55 | # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this 56 | # test if an old version of lxml is installed. 57 | 58 | @skipIf( 59 | not LXML_PRESENT or LXML_VERSION < (2,3,5,0), 60 | "Skipping doctype test for old version of lxml to avoid segfault.") 61 | def test_empty_doctype(self): 62 | soup = self.soup("") 63 | doctype = soup.contents[0] 64 | self.assertEqual("", doctype.strip()) 65 | 66 | def test_beautifulstonesoup_is_xml_parser(self): 67 | # Make sure that the deprecated BSS class uses an xml builder 68 | # if one is installed. 69 | with warnings.catch_warnings(record=True) as w: 70 | soup = BeautifulStoneSoup("") 71 | self.assertEqual(u"", unicode(soup.b)) 72 | self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) 73 | 74 | @skipIf( 75 | not LXML_PRESENT, 76 | "lxml seems not to be present, not testing its XML tree builder.") 77 | class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest): 78 | """See ``HTMLTreeBuilderSmokeTest``.""" 79 | 80 | @property 81 | def default_builder(self): 82 | return LXMLTreeBuilderForXML() 83 | -------------------------------------------------------------------------------- /dateutil/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2003-2010 Gustavo Niemeyer 3 | 4 | This module offers extensions to the standard python 2.3+ 5 | datetime module. 6 | """ 7 | __author__ = "Gustavo Niemeyer " 8 | __license__ = "PSF License" 9 | __version__ = "1.5" 10 | -------------------------------------------------------------------------------- /dateutil/easter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2003-2007 Gustavo Niemeyer 3 | 4 | This module offers extensions to the standard python 2.3+ 5 | datetime module. 6 | """ 7 | __author__ = "Gustavo Niemeyer " 8 | __license__ = "PSF License" 9 | 10 | import datetime 11 | 12 | __all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"] 13 | 14 | EASTER_JULIAN = 1 15 | EASTER_ORTHODOX = 2 16 | EASTER_WESTERN = 3 17 | 18 | def easter(year, method=EASTER_WESTERN): 19 | """ 20 | This method was ported from the work done by GM Arts, 21 | on top of the algorithm by Claus Tondering, which was 22 | based in part on the algorithm of Ouding (1940), as 23 | quoted in "Explanatory Supplement to the Astronomical 24 | Almanac", P. Kenneth Seidelmann, editor. 25 | 26 | This algorithm implements three different easter 27 | calculation methods: 28 | 29 | 1 - Original calculation in Julian calendar, valid in 30 | dates after 326 AD 31 | 2 - Original method, with date converted to Gregorian 32 | calendar, valid in years 1583 to 4099 33 | 3 - Revised method, in Gregorian calendar, valid in 34 | years 1583 to 4099 as well 35 | 36 | These methods are represented by the constants: 37 | 38 | EASTER_JULIAN = 1 39 | EASTER_ORTHODOX = 2 40 | EASTER_WESTERN = 3 41 | 42 | The default method is method 3. 43 | 44 | More about the algorithm may be found at: 45 | 46 | http://users.chariot.net.au/~gmarts/eastalg.htm 47 | 48 | and 49 | 50 | http://www.tondering.dk/claus/calendar.html 51 | 52 | """ 53 | 54 | if not (1 <= method <= 3): 55 | raise ValueError, "invalid method" 56 | 57 | # g - Golden year - 1 58 | # c - Century 59 | # h - (23 - Epact) mod 30 60 | # i - Number of days from March 21 to Paschal Full Moon 61 | # j - Weekday for PFM (0=Sunday, etc) 62 | # p - Number of days from March 21 to Sunday on or before PFM 63 | # (-6 to 28 methods 1 & 3, to 56 for method 2) 64 | # e - Extra days to add for method 2 (converting Julian 65 | # date to Gregorian date) 66 | 67 | y = year 68 | g = y % 19 69 | e = 0 70 | if method < 3: 71 | # Old method 72 | i = (19*g+15)%30 73 | j = (y+y//4+i)%7 74 | if method == 2: 75 | # Extra dates to convert Julian to Gregorian date 76 | e = 10 77 | if y > 1600: 78 | e = e+y//100-16-(y//100-16)//4 79 | else: 80 | # New method 81 | c = y//100 82 | h = (c-c//4-(8*c+13)//25+19*g+15)%30 83 | i = h-(h//28)*(1-(h//28)*(29//(h+1))*((21-g)//11)) 84 | j = (y+y//4+i+2-c+c//4)%7 85 | 86 | # p can be from -6 to 56 corresponding to dates 22 March to 23 May 87 | # (later dates apply to method 2, although 23 May never actually occurs) 88 | p = i-j+e 89 | d = 1+(p+27+(p+6)//40)%31 90 | m = 3+(p+26)//30 91 | return datetime.date(int(y),int(m),int(d)) 92 | 93 | -------------------------------------------------------------------------------- /dateutil/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinmarks/unmung/5b8539971a8e1a64694e65643670936512fc987d/dateutil/parser.py -------------------------------------------------------------------------------- /dateutil/zoneinfo/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2003-2005 Gustavo Niemeyer 3 | 4 | This module offers extensions to the standard python 2.3+ 5 | datetime module. 6 | """ 7 | from dateutil.tz import tzfile 8 | from tarfile import TarFile 9 | import os 10 | 11 | __author__ = "Gustavo Niemeyer " 12 | __license__ = "PSF License" 13 | 14 | __all__ = ["setcachesize", "gettz", "rebuild"] 15 | 16 | CACHE = [] 17 | CACHESIZE = 10 18 | 19 | class tzfile(tzfile): 20 | def __reduce__(self): 21 | return (gettz, (self._filename,)) 22 | 23 | def getzoneinfofile(): 24 | filenames = os.listdir(os.path.join(os.path.dirname(__file__))) 25 | filenames.sort() 26 | filenames.reverse() 27 | for entry in filenames: 28 | if entry.startswith("zoneinfo") and ".tar." in entry: 29 | return os.path.join(os.path.dirname(__file__), entry) 30 | return None 31 | 32 | ZONEINFOFILE = getzoneinfofile() 33 | 34 | del getzoneinfofile 35 | 36 | def setcachesize(size): 37 | global CACHESIZE, CACHE 38 | CACHESIZE = size 39 | del CACHE[size:] 40 | 41 | def gettz(name): 42 | tzinfo = None 43 | if ZONEINFOFILE: 44 | for cachedname, tzinfo in CACHE: 45 | if cachedname == name: 46 | break 47 | else: 48 | tf = TarFile.open(ZONEINFOFILE) 49 | try: 50 | zonefile = tf.extractfile(name) 51 | except KeyError: 52 | tzinfo = None 53 | else: 54 | tzinfo = tzfile(zonefile) 55 | tf.close() 56 | CACHE.insert(0, (name, tzinfo)) 57 | del CACHE[CACHESIZE:] 58 | return tzinfo 59 | 60 | def rebuild(filename, tag=None, format="gz"): 61 | import tempfile, shutil 62 | tmpdir = tempfile.mkdtemp() 63 | zonedir = os.path.join(tmpdir, "zoneinfo") 64 | moduledir = os.path.dirname(__file__) 65 | if tag: tag = "-"+tag 66 | targetname = "zoneinfo%s.tar.%s" % (tag, format) 67 | try: 68 | tf = TarFile.open(filename) 69 | for name in tf.getnames(): 70 | if not (name.endswith(".sh") or 71 | name.endswith(".tab") or 72 | name == "leapseconds"): 73 | tf.extract(name, tmpdir) 74 | filepath = os.path.join(tmpdir, name) 75 | os.system("zic -d %s %s" % (zonedir, filepath)) 76 | tf.close() 77 | target = os.path.join(moduledir, targetname) 78 | for entry in os.listdir(moduledir): 79 | if entry.startswith("zoneinfo") and ".tar." in entry: 80 | os.unlink(os.path.join(moduledir, entry)) 81 | tf = TarFile.open(target, "w:%s" % format) 82 | for entry in os.listdir(zonedir): 83 | entrypath = os.path.join(zonedir, entry) 84 | tf.add(entrypath, entry) 85 | tf.close() 86 | finally: 87 | shutil.rmtree(tmpdir) 88 | -------------------------------------------------------------------------------- /dateutil/zoneinfo/zoneinfo-2010g.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinmarks/unmung/5b8539971a8e1a64694e65643670936512fc987d/dateutil/zoneinfo/zoneinfo-2010g.tar.gz -------------------------------------------------------------------------------- /favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinmarks/unmung/5b8539971a8e1a64694e65643670936512fc987d/favicon.ico -------------------------------------------------------------------------------- /hfeed.html: -------------------------------------------------------------------------------- 1 | 2 | {{feed.title}} 3 | 4 | 5 | 6 | 7 | 8 | 9 |

{{feed.title}}

10 | {% if feed.subtitle_detail and feed.subtitle_detail.type=="text/plain" %}

{{feed.subtitle}}

11 | {% else %}

{% autoescape false %}{{feed.subtitle}}{% endautoescape %}

12 | {% endif %} 13 | {% if feed.author_detail %} 14 |

15 | {% if feed.image %} 16 | 17 | {% endif %} 18 | {% for link in feed.links %} 19 | {% if link.type.split('/')[0] == 'image' and link.rel=='avatar' %} 20 | 21 | {% endif %} 22 | {% endfor %} 23 | {{feed.author_detail.name}} 24 |

25 | {% endif %} 26 | {% for entry in entries %} 27 |
28 | {% autoescape false %}{{entry.title}}{% endautoescape %} 29 |
30 | {% if entry.summary and entry.content %} 31 | 32 | {% autoescape false %} 33 | {{entry.summary}} 34 | {% endautoescape %} 35 | {% if entry.content[0].value.startswith(entry.summary) %} 36 | {% autoescape false %} 37 | {{entry.content[0].value.split(entry.summary)[1] }} 38 | {% endautoescape %} 39 | {% else %} 40 | {% autoescape false %} 41 | {{entry.content[0].value}} 42 | {% endautoescape %} 43 | {% endif %} 44 | {% elif entry.content %} 45 | {% autoescape false %} 46 | {{entry.content[0].value}} 47 | {% endautoescape %} 48 | {% elif entry.summary %} 49 | 50 | {% autoescape false %} 51 | {{entry.summary}} 52 | {% endautoescape %} 53 | 54 | {% endif %} 55 | 56 | {% for link in entry.links %} 57 | {% if link.type == 'audio/mpeg' or link.type == 'audio/mp3' or (link.href and link.href.endswith(".mp3")) %} 58 |

59 | {% endif %} 60 | {% if link.type == 'video/x-m4v' or link.type == 'video/x-mp4' or link.type == 'video/mp4' %} 61 |

62 | {% endif %} 63 | {% if link.type and link.type.split('/')[0] == 'image' and link.rel not in ['avatar','header'] %} 64 |

65 | {% endif %} 66 | {% endfor %} 67 |

68 | {% if entry.published %} 69 |

70 | {% endif %} 71 | {% if entry.updated %} 72 |

73 | {% endif %} 74 | {% if entry.authors %} 75 |

    by 76 | {% for author in entry.authors %} 77 |
  • {{author.name}} 78 | {% endfor %} 79 |
80 | {% else %} 81 | {% if entry.author_detail %} 82 | {% if entry.author_detail.href %} 83 |

by 84 | {% for link in entry.links %} 85 | {% if link.type.split('/')[0] == 'image' and link.rel=='avatar' %} 86 | 87 | {% endif %} 88 | {% endfor %} 89 | {{entry.author_detail.name}} 90 | {% else %} 91 | {% autoescape false %} 92 | by {{entry.author_detail.name}} 93 | {% endautoescape %} 94 | {% endif %} 95 | {% endif %} 96 | {% endif %} 97 |

98 |
99 | {% endfor %} 100 | {% if raw == 'on' %} 101 | {{feeds}} 102 |

{{feed}} 103 |

104 | {{entries}} 105 | 106 | {% endif %} 107 | 108 | -------------------------------------------------------------------------------- /hfeedello.html: -------------------------------------------------------------------------------- 1 | 2 | unmunged ello for {{feed.name}} 3 | 4 | 5 | 6 | 7 | 8 | 9 |

{{feed.name}}

10 |

11 | 12 |

{{feed.name}}

13 |
    14 | {% for entry in feed %} 15 |
    16 | {% for bodypart in entry.body %} 17 | {% if bodypart.kind=="text" %} 18 |
    19 | {% autoescape false %} 20 | {{bodypart.data}} 21 | {% endautoescape %} 22 |
    23 | {% endif %} 24 | {% if bodypart.kind=="image" %} 25 |
    26 | {{bodypart.data.alt}} 27 |
    28 | {% endif %} 29 | {% endfor %} 30 | {% if entry.created_at %} 31 |

    32 | {% endif %} 33 |

    34 |
    35 | {% endfor %} 36 | {% if raw == 'on' %} 37 | {{feeds}} 38 |

    {{feed}} 39 |

    40 | {{entries}} 41 | 42 | {% endif %} 43 | 44 | -------------------------------------------------------------------------------- /hfeedmasto.html: -------------------------------------------------------------------------------- 1 | 2 | {{feed.title}} 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |

    12 |
    13 | 14 | 15 | {% for instance in instances %} 16 | 17 | {% endfor %} 18 | 19 | 23 | 24 |
    25 | 26 |

    {{feed.name}}

    27 |
    28 |

    {{error}}

    29 |
    30 | {% for entry in entries %} 31 |
    32 |
    33 | 34 | {{entry.account.display_name or entry.account.username}} @{% autoescape false %}{{entry.account.acct}}{% endautoescape %} 35 |
    36 | {% if entry.spoiler_text!="" %} 37 |
    38 | {% autoescape false %}{{entry.spoiler_text}}{% endautoescape %} Show More 39 | {% endif %} 40 | 41 |
    42 | {% if entry.content %} 43 | {% autoescape false %} 44 | {{entry.content}} 45 | {% endautoescape %} 46 | {% endif %} 47 |
    48 | {% if entry.nsfw_hide_media and entry.media_attachments %} 49 |
    Show NSFW media 50 | {% endif %} 51 | {% for media in entry.media_attachments %} 52 |
    53 | {% endfor %} 54 | {% if entry.nsfw_hide_media %} 55 |
    56 | {% endif %} 57 | {% if entry.spoiler_text!="" %} 58 |
    59 | {% endif %} 60 | 61 |
    62 | {% endfor %} 63 |
    64 | 65 | -------------------------------------------------------------------------------- /hovercard.html: -------------------------------------------------------------------------------- 1 | 2 | {{" ".join(item.properties.name)}} 3 | 4 | 5 | 6 | 7 | 8 |
    9 |

    {% if item.properties.photo %} 10 | 11 | {% endif %} 12 | 13 | {{" ".join(item.properties.name)}} 14 |

    15 | {% if item.properties.summary %} 16 |

    {{" ".join(item.properties.summary)}}

    17 | {% elif item.properties.note %} 18 | {% if item.properties.note[0].html %} 19 | {% autoescape false %} 20 |

    {{item.properties.note[0].html}}

    21 | {% endautoescape %} 22 | {% else %} 23 |

    {{" ".join(item.properties.note)}}

    24 | {% endif %} 25 | {% endif %} 26 |
    27 | 28 | -------------------------------------------------------------------------------- /hovercard2.html: -------------------------------------------------------------------------------- 1 | 2 | {{name}} 3 | 4 | 5 | 6 | 7 | 8 |
    9 | {% if banner %} 10 | 13 | {% endif %} 14 |

    {% if photo %} 15 | 16 | {% endif %} 17 | 18 | {{name}} 19 |

    20 | {% if summary %} 21 | {% autoescape false %} 22 |

    {{summary}}

    23 | {% endautoescape %} 24 | {% endif %} 25 | {% if org %} 26 | {% autoescape false %} 27 |

    {{org}}

    28 | {% endautoescape %} 29 | {% endif %} 30 | 31 | {% if entries %} 32 |
    33 | {% for entry in entries %} 34 |

    {{entry.name}}

    35 | {% if entry.featured %} 36 | 39 | {% elif entry.photo %} 40 |
    41 | {% endif %} 42 |

    {{entry.summary}}

    43 |
    44 | {% endfor %} 45 |
    46 | {% endif %} 47 |
    48 | 49 | -------------------------------------------------------------------------------- /hovertest.html: -------------------------------------------------------------------------------- 1 | 2 | {% autoescape true %} 3 | 4 | hovercard test 5 | 6 | 7 | 8 | 9 | 10 | 13 | 14 | 15 | {% for url in urls %} 16 | 17 | {% endfor %} 18 | 19 | 20 | {% endautoescape %} -------------------------------------------------------------------------------- /html5lib/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinmarks/unmung/5b8539971a8e1a64694e65643670936512fc987d/html5lib/.DS_Store -------------------------------------------------------------------------------- /html5lib/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | HTML parsing library based on the WHATWG "HTML5" 3 | specification. The parser is designed to be compatible with existing 4 | HTML found in the wild and implements well-defined error recovery that 5 | is largely compatible with modern desktop web browsers. 6 | 7 | Example usage: 8 | 9 | import html5lib 10 | f = open("my_document.html") 11 | tree = html5lib.parse(f) 12 | """ 13 | 14 | from __future__ import absolute_import, division, unicode_literals 15 | 16 | from .html5parser import HTMLParser, parse, parseFragment 17 | from .treebuilders import getTreeBuilder 18 | from .treewalkers import getTreeWalker 19 | from .serializer import serialize 20 | 21 | __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", 22 | "getTreeWalker", "serialize"] 23 | __version__ = "0.999999-dev" 24 | -------------------------------------------------------------------------------- /html5lib/filters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinmarks/unmung/5b8539971a8e1a64694e65643670936512fc987d/html5lib/filters/__init__.py -------------------------------------------------------------------------------- /html5lib/filters/_base.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | 4 | class Filter(object): 5 | def __init__(self, source): 6 | self.source = source 7 | 8 | def __iter__(self): 9 | return iter(self.source) 10 | 11 | def __getattr__(self, name): 12 | return getattr(self.source, name) 13 | -------------------------------------------------------------------------------- /html5lib/filters/alphabeticalattributes.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from . import _base 4 | 5 | try: 6 | from collections import OrderedDict 7 | except ImportError: 8 | from ordereddict import OrderedDict 9 | 10 | 11 | class Filter(_base.Filter): 12 | def __iter__(self): 13 | for token in _base.Filter.__iter__(self): 14 | if token["type"] in ("StartTag", "EmptyTag"): 15 | attrs = OrderedDict() 16 | for name, value in sorted(token["data"].items(), 17 | key=lambda x: x[0]): 18 | attrs[name] = value 19 | token["data"] = attrs 20 | yield token 21 | -------------------------------------------------------------------------------- /html5lib/filters/inject_meta_charset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from . import _base 4 | 5 | 6 | class Filter(_base.Filter): 7 | def __init__(self, source, encoding): 8 | _base.Filter.__init__(self, source) 9 | self.encoding = encoding 10 | 11 | def __iter__(self): 12 | state = "pre_head" 13 | meta_found = (self.encoding is None) 14 | pending = [] 15 | 16 | for token in _base.Filter.__iter__(self): 17 | type = token["type"] 18 | if type == "StartTag": 19 | if token["name"].lower() == "head": 20 | state = "in_head" 21 | 22 | elif type == "EmptyTag": 23 | if token["name"].lower() == "meta": 24 | # replace charset with actual encoding 25 | has_http_equiv_content_type = False 26 | for (namespace, name), value in token["data"].items(): 27 | if namespace is not None: 28 | continue 29 | elif name.lower() == 'charset': 30 | token["data"][(namespace, name)] = self.encoding 31 | meta_found = True 32 | break 33 | elif name == 'http-equiv' and value.lower() == 'content-type': 34 | has_http_equiv_content_type = True 35 | else: 36 | if has_http_equiv_content_type and (None, "content") in token["data"]: 37 | token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding 38 | meta_found = True 39 | 40 | elif token["name"].lower() == "head" and not meta_found: 41 | # insert meta into empty head 42 | yield {"type": "StartTag", "name": "head", 43 | "data": token["data"]} 44 | yield {"type": "EmptyTag", "name": "meta", 45 | "data": {(None, "charset"): self.encoding}} 46 | yield {"type": "EndTag", "name": "head"} 47 | meta_found = True 48 | continue 49 | 50 | elif type == "EndTag": 51 | if token["name"].lower() == "head" and pending: 52 | # insert meta into head (if necessary) and flush pending queue 53 | yield pending.pop(0) 54 | if not meta_found: 55 | yield {"type": "EmptyTag", "name": "meta", 56 | "data": {(None, "charset"): self.encoding}} 57 | while pending: 58 | yield pending.pop(0) 59 | meta_found = True 60 | state = "post_head" 61 | 62 | if state == "in_head": 63 | pending.append(token) 64 | else: 65 | yield token 66 | -------------------------------------------------------------------------------- /html5lib/filters/lint.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from . import _base 4 | from ..constants import cdataElements, rcdataElements, voidElements 5 | 6 | from ..constants import spaceCharacters 7 | spaceCharacters = "".join(spaceCharacters) 8 | 9 | 10 | class LintError(Exception): 11 | pass 12 | 13 | 14 | class Filter(_base.Filter): 15 | def __iter__(self): 16 | open_elements = [] 17 | contentModelFlag = "PCDATA" 18 | for token in _base.Filter.__iter__(self): 19 | type = token["type"] 20 | if type in ("StartTag", "EmptyTag"): 21 | name = token["name"] 22 | if contentModelFlag != "PCDATA": 23 | raise LintError("StartTag not in PCDATA content model flag: %(tag)s" % {"tag": name}) 24 | if not isinstance(name, str): 25 | raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) 26 | if not name: 27 | raise LintError("Empty tag name") 28 | if type == "StartTag" and name in voidElements: 29 | raise LintError("Void element reported as StartTag token: %(tag)s" % {"tag": name}) 30 | elif type == "EmptyTag" and name not in voidElements: 31 | raise LintError("Non-void element reported as EmptyTag token: %(tag)s" % {"tag": token["name"]}) 32 | if type == "StartTag": 33 | open_elements.append(name) 34 | for name, value in token["data"]: 35 | if not isinstance(name, str): 36 | raise LintError("Attribute name is not a string: %(name)r" % {"name": name}) 37 | if not name: 38 | raise LintError("Empty attribute name") 39 | if not isinstance(value, str): 40 | raise LintError("Attribute value is not a string: %(value)r" % {"value": value}) 41 | if name in cdataElements: 42 | contentModelFlag = "CDATA" 43 | elif name in rcdataElements: 44 | contentModelFlag = "RCDATA" 45 | elif name == "plaintext": 46 | contentModelFlag = "PLAINTEXT" 47 | 48 | elif type == "EndTag": 49 | name = token["name"] 50 | if not isinstance(name, str): 51 | raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) 52 | if not name: 53 | raise LintError("Empty tag name") 54 | if name in voidElements: 55 | raise LintError("Void element reported as EndTag token: %(tag)s" % {"tag": name}) 56 | start_name = open_elements.pop() 57 | if start_name != name: 58 | raise LintError("EndTag (%(end)s) does not match StartTag (%(start)s)" % {"end": name, "start": start_name}) 59 | contentModelFlag = "PCDATA" 60 | 61 | elif type == "Comment": 62 | if contentModelFlag != "PCDATA": 63 | raise LintError("Comment not in PCDATA content model flag") 64 | 65 | elif type in ("Characters", "SpaceCharacters"): 66 | data = token["data"] 67 | if not isinstance(data, str): 68 | raise LintError("Attribute name is not a string: %(name)r" % {"name": data}) 69 | if not data: 70 | raise LintError("%(type)s token with empty data" % {"type": type}) 71 | if type == "SpaceCharacters": 72 | data = data.strip(spaceCharacters) 73 | if data: 74 | raise LintError("Non-space character(s) found in SpaceCharacters token: %(token)r" % {"token": data}) 75 | 76 | elif type == "Doctype": 77 | name = token["name"] 78 | if contentModelFlag != "PCDATA": 79 | raise LintError("Doctype not in PCDATA content model flag: %(name)s" % {"name": name}) 80 | if not isinstance(name, str): 81 | raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) 82 | # XXX: what to do with token["data"] ? 83 | 84 | elif type in ("ParseError", "SerializeError"): 85 | pass 86 | 87 | else: 88 | raise LintError("Unknown token type: %(type)s" % {"type": type}) 89 | 90 | yield token 91 | -------------------------------------------------------------------------------- /html5lib/filters/sanitizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from . import _base 4 | from ..sanitizer import HTMLSanitizerMixin 5 | 6 | 7 | class Filter(_base.Filter, HTMLSanitizerMixin): 8 | def __iter__(self): 9 | for token in _base.Filter.__iter__(self): 10 | token = self.sanitize_token(token) 11 | if token: 12 | yield token 13 | -------------------------------------------------------------------------------- /html5lib/filters/whitespace.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | import re 4 | 5 | from . import _base 6 | from ..constants import rcdataElements, spaceCharacters 7 | spaceCharacters = "".join(spaceCharacters) 8 | 9 | SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) 10 | 11 | 12 | class Filter(_base.Filter): 13 | 14 | spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) 15 | 16 | def __iter__(self): 17 | preserve = 0 18 | for token in _base.Filter.__iter__(self): 19 | type = token["type"] 20 | if type == "StartTag" \ 21 | and (preserve or token["name"] in self.spacePreserveElements): 22 | preserve += 1 23 | 24 | elif type == "EndTag" and preserve: 25 | preserve -= 1 26 | 27 | elif not preserve and type == "SpaceCharacters" and token["data"]: 28 | # Test on token["data"] above to not introduce spaces where there were not 29 | token["data"] = " " 30 | 31 | elif not preserve and type == "Characters": 32 | token["data"] = collapse_spaces(token["data"]) 33 | 34 | yield token 35 | 36 | 37 | def collapse_spaces(text): 38 | return SPACES_REGEX.sub(' ', text) 39 | -------------------------------------------------------------------------------- /html5lib/serializer/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from .. import treewalkers 4 | 5 | from .htmlserializer import HTMLSerializer 6 | 7 | 8 | def serialize(input, tree="etree", format="html", encoding=None, 9 | **serializer_opts): 10 | # XXX: Should we cache this? 11 | walker = treewalkers.getTreeWalker(tree) 12 | if format == "html": 13 | s = HTMLSerializer(**serializer_opts) 14 | else: 15 | raise ValueError("type must be html") 16 | return s.render(walker(input), encoding) 17 | -------------------------------------------------------------------------------- /html5lib/treeadapters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinmarks/unmung/5b8539971a8e1a64694e65643670936512fc987d/html5lib/treeadapters/__init__.py -------------------------------------------------------------------------------- /html5lib/treeadapters/sax.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from xml.sax.xmlreader import AttributesNSImpl 4 | 5 | from ..constants import adjustForeignAttributes, unadjustForeignAttributes 6 | 7 | prefix_mapping = {} 8 | for prefix, localName, namespace in adjustForeignAttributes.values(): 9 | if prefix is not None: 10 | prefix_mapping[prefix] = namespace 11 | 12 | 13 | def to_sax(walker, handler): 14 | """Call SAX-like content handler based on treewalker walker""" 15 | handler.startDocument() 16 | for prefix, namespace in prefix_mapping.items(): 17 | handler.startPrefixMapping(prefix, namespace) 18 | 19 | for token in walker: 20 | type = token["type"] 21 | if type == "Doctype": 22 | continue 23 | elif type in ("StartTag", "EmptyTag"): 24 | attrs = AttributesNSImpl(token["data"], 25 | unadjustForeignAttributes) 26 | handler.startElementNS((token["namespace"], token["name"]), 27 | token["name"], 28 | attrs) 29 | if type == "EmptyTag": 30 | handler.endElementNS((token["namespace"], token["name"]), 31 | token["name"]) 32 | elif type == "EndTag": 33 | handler.endElementNS((token["namespace"], token["name"]), 34 | token["name"]) 35 | elif type in ("Characters", "SpaceCharacters"): 36 | handler.characters(token["data"]) 37 | elif type == "Comment": 38 | pass 39 | else: 40 | assert False, "Unknown token type" 41 | 42 | for prefix, namespace in prefix_mapping.items(): 43 | handler.endPrefixMapping(prefix) 44 | handler.endDocument() 45 | -------------------------------------------------------------------------------- /html5lib/treebuilders/__init__.py: -------------------------------------------------------------------------------- 1 | """A collection of modules for building different kinds of tree from 2 | HTML documents. 3 | 4 | To create a treebuilder for a new type of tree, you need to do 5 | implement several things: 6 | 7 | 1) A set of classes for various types of elements: Document, Doctype, 8 | Comment, Element. These must implement the interface of 9 | _base.treebuilders.Node (although comment nodes have a different 10 | signature for their constructor, see treebuilders.etree.Comment) 11 | Textual content may also be implemented as another node type, or not, as 12 | your tree implementation requires. 13 | 14 | 2) A treebuilder object (called TreeBuilder by convention) that 15 | inherits from treebuilders._base.TreeBuilder. This has 4 required attributes: 16 | documentClass - the class to use for the bottommost node of a document 17 | elementClass - the class to use for HTML Elements 18 | commentClass - the class to use for comments 19 | doctypeClass - the class to use for doctypes 20 | It also has one required method: 21 | getDocument - Returns the root node of the complete document tree 22 | 23 | 3) If you wish to run the unit tests, you must also create a 24 | testSerializer method on your treebuilder which accepts a node and 25 | returns a string containing Node and its children serialized according 26 | to the format used in the unittests 27 | """ 28 | 29 | from __future__ import absolute_import, division, unicode_literals 30 | 31 | from ..utils import default_etree 32 | 33 | treeBuilderCache = {} 34 | 35 | 36 | def getTreeBuilder(treeType, implementation=None, **kwargs): 37 | """Get a TreeBuilder class for various types of tree with built-in support 38 | 39 | treeType - the name of the tree type required (case-insensitive). Supported 40 | values are: 41 | 42 | "dom" - A generic builder for DOM implementations, defaulting to 43 | a xml.dom.minidom based implementation. 44 | "etree" - A generic builder for tree implementations exposing an 45 | ElementTree-like interface, defaulting to 46 | xml.etree.cElementTree if available and 47 | xml.etree.ElementTree if not. 48 | "lxml" - A etree-based builder for lxml.etree, handling 49 | limitations of lxml's implementation. 50 | 51 | implementation - (Currently applies to the "etree" and "dom" tree types). A 52 | module implementing the tree type e.g. 53 | xml.etree.ElementTree or xml.etree.cElementTree.""" 54 | 55 | treeType = treeType.lower() 56 | if treeType not in treeBuilderCache: 57 | if treeType == "dom": 58 | from . import dom 59 | # Come up with a sane default (pref. from the stdlib) 60 | if implementation is None: 61 | from xml.dom import minidom 62 | implementation = minidom 63 | # NEVER cache here, caching is done in the dom submodule 64 | return dom.getDomModule(implementation, **kwargs).TreeBuilder 65 | elif treeType == "lxml": 66 | from . import etree_lxml 67 | treeBuilderCache[treeType] = etree_lxml.TreeBuilder 68 | elif treeType == "etree": 69 | from . import etree 70 | if implementation is None: 71 | implementation = default_etree 72 | # NEVER cache here, caching is done in the etree submodule 73 | return etree.getETreeModule(implementation, **kwargs).TreeBuilder 74 | else: 75 | raise ValueError("""Unrecognised treebuilder "%s" """ % treeType) 76 | return treeBuilderCache.get(treeType) 77 | -------------------------------------------------------------------------------- /html5lib/treewalkers/dom.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from xml.dom import Node 4 | 5 | from . import _base 6 | 7 | 8 | class TreeWalker(_base.NonRecursiveTreeWalker): 9 | def getNodeDetails(self, node): 10 | if node.nodeType == Node.DOCUMENT_TYPE_NODE: 11 | return _base.DOCTYPE, node.name, node.publicId, node.systemId 12 | 13 | elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 14 | return _base.TEXT, node.nodeValue 15 | 16 | elif node.nodeType == Node.ELEMENT_NODE: 17 | attrs = {} 18 | for attr in list(node.attributes.keys()): 19 | attr = node.getAttributeNode(attr) 20 | if attr.namespaceURI: 21 | attrs[(attr.namespaceURI, attr.localName)] = attr.value 22 | else: 23 | attrs[(None, attr.name)] = attr.value 24 | return (_base.ELEMENT, node.namespaceURI, node.nodeName, 25 | attrs, node.hasChildNodes()) 26 | 27 | elif node.nodeType == Node.COMMENT_NODE: 28 | return _base.COMMENT, node.nodeValue 29 | 30 | elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): 31 | return (_base.DOCUMENT,) 32 | 33 | else: 34 | return _base.UNKNOWN, node.nodeType 35 | 36 | def getFirstChild(self, node): 37 | return node.firstChild 38 | 39 | def getNextSibling(self, node): 40 | return node.nextSibling 41 | 42 | def getParentNode(self, node): 43 | return node.parentNode 44 | -------------------------------------------------------------------------------- /html5lib/treewalkers/genshistream.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from genshi.core import QName 4 | from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT 5 | from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT 6 | 7 | from . import _base 8 | 9 | from ..constants import voidElements, namespaces 10 | 11 | 12 | class TreeWalker(_base.TreeWalker): 13 | def __iter__(self): 14 | # Buffer the events so we can pass in the following one 15 | previous = None 16 | for event in self.tree: 17 | if previous is not None: 18 | for token in self.tokens(previous, event): 19 | yield token 20 | previous = event 21 | 22 | # Don't forget the final event! 23 | if previous is not None: 24 | for token in self.tokens(previous, None): 25 | yield token 26 | 27 | def tokens(self, event, next): 28 | kind, data, pos = event 29 | if kind == START: 30 | tag, attribs = data 31 | name = tag.localname 32 | namespace = tag.namespace 33 | converted_attribs = {} 34 | for k, v in attribs: 35 | if isinstance(k, QName): 36 | converted_attribs[(k.namespace, k.localname)] = v 37 | else: 38 | converted_attribs[(None, k)] = v 39 | 40 | if namespace == namespaces["html"] and name in voidElements: 41 | for token in self.emptyTag(namespace, name, converted_attribs, 42 | not next or next[0] != END 43 | or next[1] != tag): 44 | yield token 45 | else: 46 | yield self.startTag(namespace, name, converted_attribs) 47 | 48 | elif kind == END: 49 | name = data.localname 50 | namespace = data.namespace 51 | if name not in voidElements: 52 | yield self.endTag(namespace, name) 53 | 54 | elif kind == COMMENT: 55 | yield self.comment(data) 56 | 57 | elif kind == TEXT: 58 | for token in self.text(data): 59 | yield token 60 | 61 | elif kind == DOCTYPE: 62 | yield self.doctype(*data) 63 | 64 | elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, 65 | START_CDATA, END_CDATA, PI): 66 | pass 67 | 68 | else: 69 | yield self.unknown(kind) 70 | -------------------------------------------------------------------------------- /html5lib/treewalkers/pulldom.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \ 4 | COMMENT, IGNORABLE_WHITESPACE, CHARACTERS 5 | 6 | from . import _base 7 | 8 | from ..constants import voidElements 9 | 10 | 11 | class TreeWalker(_base.TreeWalker): 12 | def __iter__(self): 13 | ignore_until = None 14 | previous = None 15 | for event in self.tree: 16 | if previous is not None and \ 17 | (ignore_until is None or previous[1] is ignore_until): 18 | if previous[1] is ignore_until: 19 | ignore_until = None 20 | for token in self.tokens(previous, event): 21 | yield token 22 | if token["type"] == "EmptyTag": 23 | ignore_until = previous[1] 24 | previous = event 25 | if ignore_until is None or previous[1] is ignore_until: 26 | for token in self.tokens(previous, None): 27 | yield token 28 | elif ignore_until is not None: 29 | raise ValueError("Illformed DOM event stream: void element without END_ELEMENT") 30 | 31 | def tokens(self, event, next): 32 | type, node = event 33 | if type == START_ELEMENT: 34 | name = node.nodeName 35 | namespace = node.namespaceURI 36 | attrs = {} 37 | for attr in list(node.attributes.keys()): 38 | attr = node.getAttributeNode(attr) 39 | attrs[(attr.namespaceURI, attr.localName)] = attr.value 40 | if name in voidElements: 41 | for token in self.emptyTag(namespace, 42 | name, 43 | attrs, 44 | not next or next[1] is not node): 45 | yield token 46 | else: 47 | yield self.startTag(namespace, name, attrs) 48 | 49 | elif type == END_ELEMENT: 50 | name = node.nodeName 51 | namespace = node.namespaceURI 52 | if name not in voidElements: 53 | yield self.endTag(namespace, name) 54 | 55 | elif type == COMMENT: 56 | yield self.comment(node.nodeValue) 57 | 58 | elif type in (IGNORABLE_WHITESPACE, CHARACTERS): 59 | for token in self.text(node.nodeValue): 60 | yield token 61 | 62 | else: 63 | yield self.unknown(type) 64 | -------------------------------------------------------------------------------- /html5lib/trie/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from .py import Trie as PyTrie 4 | 5 | Trie = PyTrie 6 | 7 | try: 8 | from .datrie import Trie as DATrie 9 | except ImportError: 10 | pass 11 | else: 12 | Trie = DATrie 13 | -------------------------------------------------------------------------------- /html5lib/trie/_base.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from collections import Mapping 4 | 5 | 6 | class Trie(Mapping): 7 | """Abstract base class for tries""" 8 | 9 | def keys(self, prefix=None): 10 | keys = super().keys() 11 | 12 | if prefix is None: 13 | return set(keys) 14 | 15 | # Python 2.6: no set comprehensions 16 | return set([x for x in keys if x.startswith(prefix)]) 17 | 18 | def has_keys_with_prefix(self, prefix): 19 | for key in self.keys(): 20 | if key.startswith(prefix): 21 | return True 22 | 23 | return False 24 | 25 | def longest_prefix(self, prefix): 26 | if prefix in self: 27 | return prefix 28 | 29 | for i in range(1, len(prefix) + 1): 30 | if prefix[:-i] in self: 31 | return prefix[:-i] 32 | 33 | raise KeyError(prefix) 34 | 35 | def longest_prefix_item(self, prefix): 36 | lprefix = self.longest_prefix(prefix) 37 | return (lprefix, self[lprefix]) 38 | -------------------------------------------------------------------------------- /html5lib/trie/datrie.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from datrie import Trie as DATrie 4 | from six import text_type 5 | 6 | from ._base import Trie as ABCTrie 7 | 8 | 9 | class Trie(ABCTrie): 10 | def __init__(self, data): 11 | chars = set() 12 | for key in data.keys(): 13 | if not isinstance(key, text_type): 14 | raise TypeError("All keys must be strings") 15 | for char in key: 16 | chars.add(char) 17 | 18 | self._data = DATrie("".join(chars)) 19 | for key, value in data.items(): 20 | self._data[key] = value 21 | 22 | def __contains__(self, key): 23 | return key in self._data 24 | 25 | def __len__(self): 26 | return len(self._data) 27 | 28 | def __iter__(self): 29 | raise NotImplementedError() 30 | 31 | def __getitem__(self, key): 32 | return self._data[key] 33 | 34 | def keys(self, prefix=None): 35 | return self._data.keys(prefix) 36 | 37 | def has_keys_with_prefix(self, prefix): 38 | return self._data.has_keys_with_prefix(prefix) 39 | 40 | def longest_prefix(self, prefix): 41 | return self._data.longest_prefix(prefix) 42 | 43 | def longest_prefix_item(self, prefix): 44 | return self._data.longest_prefix_item(prefix) 45 | -------------------------------------------------------------------------------- /html5lib/trie/py.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | from six import text_type 3 | 4 | from bisect import bisect_left 5 | 6 | from ._base import Trie as ABCTrie 7 | 8 | 9 | class Trie(ABCTrie): 10 | def __init__(self, data): 11 | if not all(isinstance(x, text_type) for x in data.keys()): 12 | raise TypeError("All keys must be strings") 13 | 14 | self._data = data 15 | self._keys = sorted(data.keys()) 16 | self._cachestr = "" 17 | self._cachepoints = (0, len(data)) 18 | 19 | def __contains__(self, key): 20 | return key in self._data 21 | 22 | def __len__(self): 23 | return len(self._data) 24 | 25 | def __iter__(self): 26 | return iter(self._data) 27 | 28 | def __getitem__(self, key): 29 | return self._data[key] 30 | 31 | def keys(self, prefix=None): 32 | if prefix is None or prefix == "" or not self._keys: 33 | return set(self._keys) 34 | 35 | if prefix.startswith(self._cachestr): 36 | lo, hi = self._cachepoints 37 | start = i = bisect_left(self._keys, prefix, lo, hi) 38 | else: 39 | start = i = bisect_left(self._keys, prefix) 40 | 41 | keys = set() 42 | if start == len(self._keys): 43 | return keys 44 | 45 | while self._keys[i].startswith(prefix): 46 | keys.add(self._keys[i]) 47 | i += 1 48 | 49 | self._cachestr = prefix 50 | self._cachepoints = (start, i) 51 | 52 | return keys 53 | 54 | def has_keys_with_prefix(self, prefix): 55 | if prefix in self._data: 56 | return True 57 | 58 | if prefix.startswith(self._cachestr): 59 | lo, hi = self._cachepoints 60 | i = bisect_left(self._keys, prefix, lo, hi) 61 | else: 62 | i = bisect_left(self._keys, prefix) 63 | 64 | if i == len(self._keys): 65 | return False 66 | 67 | return self._keys[i].startswith(prefix) 68 | -------------------------------------------------------------------------------- /html5lib/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals 2 | 3 | from types import ModuleType 4 | 5 | from six import text_type 6 | 7 | try: 8 | import xml.etree.cElementTree as default_etree 9 | except ImportError: 10 | import xml.etree.ElementTree as default_etree 11 | 12 | 13 | __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", 14 | "surrogatePairToCodepoint", "moduleFactoryFactory", 15 | "supports_lone_surrogates"] 16 | 17 | 18 | # Platforms not supporting lone surrogates (\uD800-\uDFFF) should be 19 | # caught by the below test. In general this would be any platform 20 | # using UTF-16 as its encoding of unicode strings, such as 21 | # Jython. This is because UTF-16 itself is based on the use of such 22 | # surrogates, and there is no mechanism to further escape such 23 | # escapes. 24 | try: 25 | _x = eval('"\\uD800"') 26 | if not isinstance(_x, text_type): 27 | # We need this with u"" because of http://bugs.jython.org/issue2039 28 | _x = eval('u"\\uD800"') 29 | assert isinstance(_x, text_type) 30 | except: 31 | supports_lone_surrogates = False 32 | else: 33 | supports_lone_surrogates = True 34 | 35 | 36 | class MethodDispatcher(dict): 37 | """Dict with 2 special properties: 38 | 39 | On initiation, keys that are lists, sets or tuples are converted to 40 | multiple keys so accessing any one of the items in the original 41 | list-like object returns the matching value 42 | 43 | md = MethodDispatcher({("foo", "bar"):"baz"}) 44 | md["foo"] == "baz" 45 | 46 | A default value which can be set through the default attribute. 47 | """ 48 | 49 | def __init__(self, items=()): 50 | # Using _dictEntries instead of directly assigning to self is about 51 | # twice as fast. Please do careful performance testing before changing 52 | # anything here. 53 | _dictEntries = [] 54 | for name, value in items: 55 | if type(name) in (list, tuple, frozenset, set): 56 | for item in name: 57 | _dictEntries.append((item, value)) 58 | else: 59 | _dictEntries.append((name, value)) 60 | dict.__init__(self, _dictEntries) 61 | self.default = None 62 | 63 | def __getitem__(self, key): 64 | return dict.get(self, key, self.default) 65 | 66 | 67 | # Some utility functions to dal with weirdness around UCS2 vs UCS4 68 | # python builds 69 | 70 | def isSurrogatePair(data): 71 | return (len(data) == 2 and 72 | ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and 73 | ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) 74 | 75 | 76 | def surrogatePairToCodepoint(data): 77 | char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + 78 | (ord(data[1]) - 0xDC00)) 79 | return char_val 80 | 81 | # Module Factory Factory (no, this isn't Java, I know) 82 | # Here to stop this being duplicated all over the place. 83 | 84 | 85 | def moduleFactoryFactory(factory): 86 | moduleCache = {} 87 | 88 | def moduleFactory(baseModule, *args, **kwargs): 89 | if isinstance(ModuleType.__name__, type("")): 90 | name = "_%s_factory" % baseModule.__name__ 91 | else: 92 | name = b"_%s_factory" % baseModule.__name__ 93 | 94 | if name in moduleCache: 95 | return moduleCache[name] 96 | else: 97 | mod = ModuleType(name) 98 | objs = factory(baseModule, *args, **kwargs) 99 | mod.__dict__.update(objs) 100 | moduleCache[name] = mod 101 | return mod 102 | 103 | return moduleFactory 104 | -------------------------------------------------------------------------------- /humanize/__init__.py: -------------------------------------------------------------------------------- 1 | VERSION = (0,4) 2 | 3 | from humanize.time import * 4 | from humanize.number import * 5 | from humanize.filesize import * 6 | from humanize.i18n import activate, deactivate 7 | 8 | __all__ = ['VERSION', 'naturalday', 'naturaltime', 'ordinal', 'intword', 9 | 'naturaldelta', 'intcomma', 'apnumber', 'fractional', 'naturalsize', 10 | 'activate', 'deactivate', 'naturaldate'] 11 | -------------------------------------------------------------------------------- /humanize/compat.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if sys.version_info < (3,): 4 | string_types = (basestring,) 5 | else: 6 | string_types = (str,) 7 | 8 | -------------------------------------------------------------------------------- /humanize/filesize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Bits & Bytes related humanization.""" 5 | 6 | suffixes = { 7 | 'decimal': ('kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'), 8 | 'binary': ('KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'), 9 | 'gnu': "KMGTPEZY", 10 | } 11 | 12 | 13 | def naturalsize(value, binary=False, gnu=False, format='%.1f'): 14 | """Format a number of byteslike a human readable filesize (eg. 10 kB). By 15 | default, decimal suffixes (kB, MB) are used. Passing binary=true will use 16 | binary suffixes (KiB, MiB) are used and the base will be 2**10 instead of 17 | 10**3. If ``gnu`` is True, the binary argument is ignored and GNU-style 18 | (ls -sh style) prefixes are used (K, M) with the 2**10 definition. 19 | Non-gnu modes are compatible with jinja2's ``filesizeformat`` filter.""" 20 | if gnu: suffix = suffixes['gnu'] 21 | elif binary: suffix = suffixes['binary'] 22 | else: suffix = suffixes['decimal'] 23 | 24 | base = 1024 if (gnu or binary) else 1000 25 | bytes = float(value) 26 | 27 | if bytes == 1 and not gnu: return '1 Byte' 28 | elif bytes < base and not gnu: return '%d Bytes' % bytes 29 | elif bytes < base and gnu: return '%dB' % bytes 30 | 31 | for i,s in enumerate(suffix): 32 | unit = base ** (i+2) 33 | if bytes < unit and not gnu: 34 | return (format + ' %s') % ((base * bytes / unit), s) 35 | elif bytes < unit and gnu: 36 | return (format + '%s') % ((base * bytes / unit), s) 37 | if gnu: 38 | return (format + '%s') % ((base * bytes / unit), s) 39 | return (format + ' %s') % ((base * bytes / unit), s) 40 | 41 | -------------------------------------------------------------------------------- /humanize/i18n.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import gettext as gettext_module 3 | from threading import local 4 | import os.path 5 | 6 | __all__ = ['activate', 'deactivate', 'gettext', 'ngettext'] 7 | 8 | _TRANSLATIONS = {None: gettext_module.NullTranslations()} 9 | _CURRENT = local() 10 | 11 | _DEFAULT_LOCALE_PATH = os.path.join(os.path.dirname(__file__), 'locale') 12 | 13 | 14 | def get_translation(): 15 | try: 16 | return _TRANSLATIONS[_CURRENT.locale] 17 | except (AttributeError, KeyError): 18 | return _TRANSLATIONS[None] 19 | 20 | 21 | def activate(locale, path=None): 22 | """Set 'locale' as current locale. Search for locale in directory 'path' 23 | @param locale: language name, eg 'en_GB'""" 24 | if path is None: 25 | path = _DEFAULT_LOCALE_PATH 26 | if locale not in _TRANSLATIONS: 27 | translation = gettext_module.translation('humanize', path, [locale]) 28 | _TRANSLATIONS[locale] = translation 29 | _CURRENT.locale = locale 30 | return _TRANSLATIONS[locale] 31 | 32 | 33 | def deactivate(): 34 | _CURRENT.locale = None 35 | 36 | 37 | def gettext(message): 38 | return get_translation().gettext(message) 39 | 40 | 41 | def pgettext(msgctxt, message): 42 | """'Particular gettext' function. 43 | It works with 'msgctxt' .po modifiers and allow duplicate keys with 44 | different translations. 45 | Python 2 don't have support for this GNU gettext function, so we 46 | reimplement it. It works by joining msgctx and msgid by '4' byte.""" 47 | key = msgctxt + '\x04' + message 48 | translation = get_translation().gettext(key) 49 | return message if translation == key else translation 50 | 51 | 52 | def ngettext(message, plural, num): 53 | return get_translation().ngettext(message, plural, num) 54 | 55 | 56 | def gettext_noop(message): 57 | """Example usage: 58 | CONSTANTS = [gettext_noop('first'), gettext_noop('second')] 59 | def num_name(n): 60 | return gettext(CONSTANTS[n])""" 61 | return message 62 | -------------------------------------------------------------------------------- /humanize/number.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Humanizing functions for numbers.""" 5 | 6 | import re 7 | from fractions import Fraction 8 | from .import compat 9 | from .i18n import gettext as _, gettext_noop as N_, pgettext as P_ 10 | 11 | 12 | def ordinal(value): 13 | """Converts an integer to its ordinal as a string. 1 is '1st', 2 is '2nd', 14 | 3 is '3rd', etc. Works for any integer or anything int() will turn into an 15 | integer. Anything other value will have nothing done to it.""" 16 | try: 17 | value = int(value) 18 | except (TypeError, ValueError): 19 | return value 20 | t = (P_('0', 'th'), 21 | P_('1', 'st'), 22 | P_('2', 'nd'), 23 | P_('3', 'rd'), 24 | P_('4', 'th'), 25 | P_('5', 'th'), 26 | P_('6', 'th'), 27 | P_('7', 'th'), 28 | P_('8', 'th'), 29 | P_('9', 'th')) 30 | if value % 100 in (11, 12, 13): # special case 31 | return "%d%s" % (value, t[0]) 32 | return '%d%s' % (value, t[value % 10]) 33 | 34 | 35 | def intcomma(value): 36 | """Converts an integer to a string containing commas every three digits. 37 | For example, 3000 becomes '3,000' and 45000 becomes '45,000'. To maintain 38 | some compatability with Django's intcomma, this function also accepts 39 | floats.""" 40 | try: 41 | if isinstance(value, compat.string_types): 42 | float(value.replace(',', '')) 43 | else: 44 | float(value) 45 | except (TypeError, ValueError): 46 | return value 47 | orig = str(value) 48 | new = re.sub("^(-?\d+)(\d{3})", '\g<1>,\g<2>', orig) 49 | if orig == new: 50 | return new 51 | else: 52 | return intcomma(new) 53 | 54 | powers = [10 ** x for x in (6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 100)] 55 | human_powers = (N_('million'), N_('billion'), N_('trillion'), N_('quadrillion'), 56 | N_('quintillion'), N_('sextillion'), N_('septillion'), 57 | N_('octillion'), N_('nonillion'), N_('decillion'), N_('googol')) 58 | 59 | 60 | def intword(value, format='%.1f'): 61 | """Converts a large integer to a friendly text representation. Works best for 62 | numbers over 1 million. For example, 1000000 becomes '1.0 million', 1200000 63 | becomes '1.2 million' and '1200000000' becomes '1.2 billion'. Supports up to 64 | decillion (33 digits) and googol (100 digits). You can pass format to change 65 | the number of decimal or general format of the number portion. This function 66 | returns a string unless the value passed was unable to be coaxed into an int.""" 67 | try: 68 | value = int(value) 69 | except (TypeError, ValueError): 70 | return value 71 | 72 | if value < powers[0]: 73 | return str(value) 74 | for ordinal, power in enumerate(powers[1:], 1): 75 | if value < power: 76 | chopped = value / float(powers[ordinal - 1]) 77 | return (' '.join([format, _(human_powers[ordinal - 1])])) % chopped 78 | return str(value) 79 | 80 | 81 | def apnumber(value): 82 | """For numbers 1-9, returns the number spelled out. Otherwise, returns the 83 | number. This follows Associated Press style. This always returns a string 84 | unless the value was not int-able, unlike the Django filter.""" 85 | try: 86 | value = int(value) 87 | except (TypeError, ValueError): 88 | return value 89 | if not 0 < value < 10: 90 | return str(value) 91 | return (_('one'), _('two'), _('three'), _('four'), _('five'), _('six'), 92 | _('seven'), _('eight'), _('nine'))[value - 1] 93 | 94 | 95 | def fractional(value): 96 | ''' 97 | There will be some cases where one might not want to show 98 | ugly decimal places for floats and decimals. 99 | This function returns a human readable fractional number 100 | in form of fractions and mixed fractions. 101 | Pass in a string, or a number or a float, and this function returns 102 | a string representation of a fraction 103 | or whole number 104 | or a mixed fraction 105 | Examples: 106 | fractional(0.3) will return '1/3' 107 | fractional(1.3) will return '1 3/10' 108 | fractional(float(1/3)) will return '1/3' 109 | fractional(1) will return '1' 110 | This will always return a string. 111 | ''' 112 | try: 113 | number = float(value) 114 | except (TypeError, ValueError): 115 | return value 116 | wholeNumber = int(number) 117 | frac = Fraction(number - wholeNumber).limit_denominator(1000) 118 | numerator = frac._numerator 119 | denominator = frac._denominator 120 | if wholeNumber and not numerator and denominator == 1: 121 | return '%.0f' % wholeNumber # this means that an integer was passed in (or variants of that integer like 1.0000) 122 | elif not wholeNumber: 123 | return '%.0f/%.0f' % (numerator, denominator) 124 | else: 125 | return '%.0f %.0f/%.0f' % (wholeNumber, numerator, denominator) 126 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | {% autoescape true %} 3 | 4 | Atom/RSS to h-feed 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |

    A tool to turn feeds into h-feeds

    13 |
    14 |
    15 |
    16 |
    17 |
    18 |

    A tool to turn any webpage into mf2 json

    19 |
    20 |
    url:
    21 |
    22 |
    23 |
    24 |
    {{mfjson}}
    25 |
    26 |

    A tool to autolink and embed urls into html

    27 |
    28 |
    29 |
    30 |
    31 |
    32 | {% autoescape false %}
    {{ linkedhtml }}
    {% endautoescape %} 33 |
    34 |
    35 |

    A tool to show indiecards

    36 |
    37 |
    38 |
    39 |
    40 |

    A tool to show storycards

    41 |
    42 |
    43 |
    44 |
    45 |

    A tool to show json as html

    46 |
    47 |
    48 |
    49 |
    50 |

    A tool to extract json from html lists

    51 |
    52 |
    53 |
    54 |
    55 |
    56 |

    A tool to make jf2 from mf2

    57 |
    58 |
    59 |
    60 |
    61 |
    62 |

    mastodon preview

    63 |
    64 |
    65 |
    66 |
    67 |
    68 | 69 | 70 | {% endautoescape %} -------------------------------------------------------------------------------- /indiecard.html: -------------------------------------------------------------------------------- 1 | 2 | {% if item %}{{" ".join(item.properties.name)}}{% else %}Indiecard{% endif %} 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    12 | 13 | 14 |
    15 |
    16 |
    17 | {% for item in items %} 18 |
    19 |

    {% if item.properties.photo %} 20 | 21 | {% elif item.properties.featured %} 22 | 23 | {% endif %} 24 | {{" ".join(item.properties.name)}}

    25 | {% if item.properties.author %} 26 | {% if item.properties.author[0].properties %} 27 |

    {% if item.properties.author[0].properties.photo %}{% endif %}{% if item.properties.author[0].properties.url %}{{item.properties.author[0].properties.name[0]}}{% else %}{{item.properties.author[0].properties.name[0]}}{% endif %}

    28 | {% else %} 29 |

    {{item.properties.author[0]}}

    30 | {% endif %} 31 | {% endif %} 32 | {% if item.properties.summary %} 33 |

    {{" ".join(item.properties.summary)}}

    34 | {% endif %} 35 | {% if item.properties.content %} 36 |

    {{" ".join(item.properties.content.html)}}

    37 | {% endif %} 38 | {% if item.properties.ingredient %} 39 |

    Ingredients

    40 |
      41 | {% for food in item.properties.ingredient %}
    • {{food}}{% endfor %} 42 |
    43 | {% endif %} 44 | {% if item.properties.instructions %} 45 |

    Method

    46 | {% if item.properties.instructions|length ==1 %} 47 | {% autoescape false %} 48 |

    {{item.properties.instructions[0]["html"] or item.properties.instructions[0]}}

    49 | {% endautoescape %} 50 | {% else %} 51 |
      52 | {% autoescape false %} 53 | {% for stage in item.properties.instructions %}
    1. {{stage["html"] or stage}}{% endfor %} 54 | {% endautoescape %} 55 |
    56 | {% endif %} 57 | {% endif %} 58 | {% if item.properties.note %} 59 | {% if item.properties.note[0].html %} 60 | {% autoescape false %} 61 |

    {{item.properties.note[0].html}}

    62 | {% endautoescape %} 63 | {% else %} 64 |

    {{" ".join(item.properties.note)}}

    65 | {% endif %} 66 | {% endif %} 67 |
    68 | {% endfor %} 69 |
    70 |
    71 | {%if items %}
    raw{{items}}
    {% endif %} 72 |
    73 | 74 | -------------------------------------------------------------------------------- /instancelist.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |
    7 | 8 | 9 | {% for instance in instances %} 10 | 11 | {% endfor %} 12 | 13 | 17 | 18 |
    19 | -------------------------------------------------------------------------------- /joyline.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% for line in lines %} 4 | 5 | {% endfor %} 6 | -------------------------------------------------------------------------------- /markedup.html: -------------------------------------------------------------------------------- 1 | 2 | {{name}} 3 | 4 | 5 | 16 | 17 | 18 |
    19 | <div class=h-card ><a class=u-url rel=me 20 |
    href={{url}} > 21 |
    22 |
    23 |

    <img class=u-photo src= 24 |
    />
    25 |
    {{name}}
    26 |
    27 |
    28 | </a></div> 29 |
    30 | 31 | -------------------------------------------------------------------------------- /mf2py/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinmarks/unmung/5b8539971a8e1a64694e65643670936512fc987d/mf2py/.DS_Store -------------------------------------------------------------------------------- /mf2py/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Microformats2 is a general way to mark up any HTML document with 3 | classes and propeties. This library parses structured data from 4 | a microformatted HTML document and returns a well-formed JSON 5 | dictionary. 6 | """ 7 | 8 | from .version import __version__ 9 | from .parser import Parser, parse 10 | from .mf_helpers import get_url 11 | 12 | 13 | __all__ = ['Parser', 'parse', 'get_url', '__version__'] 14 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/adr.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-adr" 4 | ], 5 | "properties": { 6 | "locality": [ 7 | "p-locality" 8 | ], 9 | "region": [ 10 | "p-region" 11 | ], 12 | "extended-address": [ 13 | "p-extended-address" 14 | ], 15 | "post-office-box": [ 16 | "p-post-office-box" 17 | ], 18 | "street-address": [ 19 | "p-street-address" 20 | ], 21 | "postal-code": [ 22 | "p-postal-code" 23 | ], 24 | "country-name": [ 25 | "p-country-name" 26 | ] 27 | } 28 | } -------------------------------------------------------------------------------- /mf2py/backcompat-rules/geo.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-geo" 4 | ], 5 | "properties": { 6 | "latitude": [ 7 | "p-latitude" 8 | ], 9 | "longitude": [ 10 | "p-longitude" 11 | ] 12 | } 13 | } -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hentry.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-entry" 4 | ], 5 | "properties": { 6 | "category": [ 7 | "p-category" 8 | ], 9 | "entry-title": [ 10 | "p-name" 11 | ], 12 | "published": [ 13 | "dt-published" 14 | ], 15 | "latitude": [ 16 | "p-latitude" 17 | ], 18 | "entry-content": [ 19 | "e-content" 20 | ], 21 | "entry-summary": [ 22 | "p-summary" 23 | ], 24 | "author": [ 25 | "p-author", 26 | "h-card" 27 | ], 28 | "geo": [ 29 | "p-geo", 30 | "h-geo" 31 | ], 32 | "updated": [ 33 | "dt-updated" 34 | ], 35 | "longitude": [ 36 | "p-longitude" 37 | ] 38 | }, 39 | "rels": { 40 | "bookmark": [ 41 | "u-url" 42 | ], 43 | "tag": [ 44 | "p-category" 45 | ] 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hfeed.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-feed" 4 | ], 5 | "properties": { 6 | "category": [ 7 | "p-category" 8 | ], 9 | "site-description": [ 10 | "p-summary" 11 | ], 12 | "description": [ 13 | "p-summary" 14 | ], 15 | "site-title": [ 16 | "p-name" 17 | ], 18 | "title": [ 19 | "p-name" 20 | ] 21 | }, 22 | "rels": { 23 | "tag": [ 24 | "p-category" 25 | ] 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hproduct.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-product" 4 | ], 5 | "properties": { 6 | "category": [ 7 | "p-category" 8 | ], 9 | "price": [ 10 | "p-price" 11 | ], 12 | "description": [ 13 | "p-description" 14 | ], 15 | "url": [ 16 | "u-url" 17 | ], 18 | "photo": [ 19 | "u-photo" 20 | ], 21 | "brand": [ 22 | "p-brand" 23 | ], 24 | "identifier": [ 25 | "u-identifier" 26 | ], 27 | "review": [ 28 | "p-review", 29 | "h-review" 30 | ], 31 | "fn": [ 32 | "p-name" 33 | ] 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hrecipe.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-recipe" 4 | ], 5 | "properties": { 6 | "nutrition": [ 7 | "p-nutrition" 8 | ], 9 | "yield": [ 10 | "p-yield" 11 | ], 12 | "author": [ 13 | "p-author", 14 | "h-card" 15 | ], 16 | "duration": [ 17 | "dt-duration" 18 | ], 19 | "photo": [ 20 | "u-photo" 21 | ], 22 | "instructions": [ 23 | "e-instructions" 24 | ], 25 | "summary": [ 26 | "p-summary" 27 | ], 28 | "fn": [ 29 | "p-name" 30 | ], 31 | "ingredient": [ 32 | "p-ingredient" 33 | ], 34 | "category": [ 35 | "p-category" 36 | ] 37 | }, 38 | "rels": { 39 | "tag": [ 40 | "p-category" 41 | ] 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hresume.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-resume" 4 | ], 5 | "properties": { 6 | "experience": [ 7 | "h-event", 8 | "p-experience" 9 | ], 10 | "summary": [ 11 | "p-summary" 12 | ], 13 | "affiliation": [ 14 | "p-affiliation", 15 | "h-card" 16 | ], 17 | "contact": [ 18 | "h-card", 19 | "p-contact" 20 | ], 21 | "skill": [ 22 | "p-skill" 23 | ], 24 | "education": [ 25 | "h-event", 26 | "p-education" 27 | ] 28 | } 29 | } -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hreview-aggregate.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-review-aggregate" 4 | ], 5 | "properties": { 6 | "rating": [ 7 | "p-rating" 8 | ], 9 | "description": [ 10 | "p-description" 11 | ], 12 | "photo": [ 13 | "u-photo" 14 | ], 15 | "worst": [ 16 | "p-worst" 17 | ], 18 | "reviewer": [ 19 | "p-reviewer", 20 | "p-author", 21 | "h-card" 22 | ], 23 | "best": [ 24 | "p-best" 25 | ], 26 | "count": [ 27 | "p-count" 28 | ], 29 | "votes": [ 30 | "p-votes" 31 | ], 32 | "dtreviewed": [ 33 | "dt-reviewed" 34 | ], 35 | "url": [ 36 | "u-url" 37 | ], 38 | "summary": [ 39 | "p-name" 40 | ], 41 | "fn": [ 42 | "p-item", 43 | "h-item", 44 | "p-name" 45 | ] 46 | } 47 | } -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-review" 4 | ], 5 | "properties": { 6 | "rating": [ 7 | "p-rating" 8 | ], 9 | "worst": [ 10 | "p-worst" 11 | ], 12 | "dtreviewed": [ 13 | "dt-reviewed" 14 | ], 15 | "reviewer": [ 16 | "p-author", 17 | "h-card" 18 | ], 19 | "url": [ 20 | "p-item", 21 | "h-item", 22 | "u-url" 23 | ], 24 | "photo": [ 25 | "p-item", 26 | "h-item", 27 | "u-photo" 28 | ], 29 | "best": [ 30 | "p-best" 31 | ], 32 | "description": [ 33 | "p-description" 34 | ], 35 | "fn": [ 36 | "p-item", 37 | "h-item", 38 | "p-name" 39 | ], 40 | "summary": [ 41 | "p-name" 42 | ], 43 | "item vcard": [ 44 | "p-item", 45 | "vcard" 46 | ], 47 | "item vevent": [ 48 | "p-item", 49 | "vevent" 50 | ], 51 | "item hproduct": [ 52 | "p-item", 53 | "hproduct" 54 | ] 55 | }, 56 | "rels": { 57 | "self bookmark": [ 58 | "u-url" 59 | ], 60 | "tag": [ 61 | "p-category" 62 | ] 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/recipe-main-info.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-recipe" 4 | ], 5 | "properties": { 6 | "content-title__text": ["p-name"], 7 | "recipe-ingredients__list-item": ["p-ingredient"], 8 | "recipe-metadata__serving": ["p-yield"], 9 | "recipe-method-wrapper": ["e-instructions"], 10 | "recipe-metadata__prep-time": ["p-duration"], 11 | "nutrition": ["p-nutrition"], 12 | "recipe-media__image": ["u-photo"], 13 | "recipe-description__text": ["p-summary"], 14 | "recipe-ingredients__link":["p-category"], 15 | "chef": ["p-author", "h-card"], 16 | "chef__image": ["u-photo"], 17 | "chef__link": ["p-name"] 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/vcard.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-card" 4 | ], 5 | "properties": { 6 | "tel": [ 7 | "p-tel" 8 | ], 9 | "honorific-suffix": [ 10 | "p-honorific-suffix" 11 | ], 12 | "family-name": [ 13 | "p-family-name" 14 | ], 15 | "photo": [ 16 | "u-photo" 17 | ], 18 | "logo": [ 19 | "u-logo" 20 | ], 21 | "postal-code": [ 22 | "p-postal-code" 23 | ], 24 | "country-name": [ 25 | "p-country-name" 26 | ], 27 | "uid": [ 28 | "u-uid" 29 | ], 30 | "category": [ 31 | "p-category" 32 | ], 33 | "adr": [ 34 | "p-adr", 35 | "h-adr" 36 | ], 37 | "locality": [ 38 | "p-locality" 39 | ], 40 | "nickname": [ 41 | "p-nickname" 42 | ], 43 | "label": [ 44 | "p-label" 45 | ], 46 | "note": [ 47 | "p-note" 48 | ], 49 | "street-address": [ 50 | "p-street-address" 51 | ], 52 | "latitude": [ 53 | "p-latitude" 54 | ], 55 | "email": [ 56 | "u-email" 57 | ], 58 | "bday": [ 59 | "dt-bday" 60 | ], 61 | "extended-address": [ 62 | "p-extended-address" 63 | ], 64 | "additional-name": [ 65 | "p-additional-name" 66 | ], 67 | "organization-unit": [ 68 | "p-organization-unit" 69 | ], 70 | "given-name": [ 71 | "p-given-name" 72 | ], 73 | "key": [ 74 | "u-key" 75 | ], 76 | "org": [ 77 | "p-org" 78 | ], 79 | "honorific-prefix": [ 80 | "p-honorific-prefix" 81 | ], 82 | "geo": [ 83 | "p-geo", 84 | "h-geo" 85 | ], 86 | "fn": [ 87 | "p-name" 88 | ], 89 | "url": [ 90 | "u-url" 91 | ], 92 | "region": [ 93 | "p-region" 94 | ], 95 | "longitude": [ 96 | "p-longitude" 97 | ], 98 | "organization-name": [ 99 | "p-organization-name" 100 | ], 101 | "title": [ 102 | "p-job-title" 103 | ], 104 | "role": [ 105 | "p-role" 106 | ] 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/vevent.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-event" 4 | ], 5 | "properties": { 6 | "attendee": [ 7 | "p-attendee" 8 | ], 9 | "description": [ 10 | "p-description" 11 | ], 12 | "duration": [ 13 | "dt-duration" 14 | ], 15 | "dtend": [ 16 | "dt-end" 17 | ], 18 | "dtstart": [ 19 | "dt-start" 20 | ], 21 | "geo": [ 22 | "p-location h-geo" 23 | ], 24 | "organizer": [ 25 | "p-organizer" 26 | ], 27 | "category": [ 28 | "p-category" 29 | ], 30 | "url": [ 31 | "u-url" 32 | ], 33 | "summary": [ 34 | "p-name" 35 | ], 36 | "contact": [ 37 | "p-contact" 38 | ], 39 | "location": [ 40 | "p-location" 41 | ] 42 | } 43 | } -------------------------------------------------------------------------------- /mf2py/datetime_helpers.py: -------------------------------------------------------------------------------- 1 | '''helper functions to deal wit datetime strings''' 2 | from __future__ import unicode_literals, print_function 3 | 4 | import re 5 | from datetime import datetime 6 | 7 | # REGEX! 8 | 9 | DATE_RE = r'(\d{4}-\d{2}-\d{2})|(\d{4}-\d{3})' 10 | SEC_RE = r'(:(?P\d{2})(\.\d+)?)' 11 | RAWTIME_RE = r'(?P\d{1,2})(:(?P\d{2})%s?)?' % (SEC_RE) 12 | AMPM_RE = r'am|pm|a\.m\.|p\.m\.|AM|PM|A\.M\.|P\.M\.' 13 | TIMEZONE_RE = r'Z|[+-]\d{1,2}:?\d{2}?' 14 | TIME_RE = (r'(?P%s)( ?(?P%s))?( ?(?P%s))?' % 15 | (RAWTIME_RE, AMPM_RE, TIMEZONE_RE)) 16 | DATETIME_RE = (r'(?P%s)(?P[T ])(?P