├── .gitignore ├── README.md ├── doc ├── example_output_html.txt └── example_output_screenshot.png ├── lib ├── bottle.py ├── html5lib │ ├── __init__.py │ ├── constants.py │ ├── filters │ │ ├── __init__.py │ │ ├── _base.py │ │ ├── formfiller.py │ │ ├── inject_meta_charset.py │ │ ├── lint.py │ │ ├── optionaltags.py │ │ ├── sanitizer.py │ │ └── whitespace.py │ ├── html5parser.py │ ├── ihatexml.py │ ├── inputstream.py │ ├── sanitizer.py │ ├── serializer │ │ ├── __init__.py │ │ ├── htmlserializer.py │ │ └── xhtmlserializer.py │ ├── tests │ │ ├── README │ │ ├── __init__.py │ │ ├── mockParser.py │ │ ├── performance │ │ │ └── concatenation.py │ │ ├── runparsertests.py │ │ ├── runtests.py │ │ ├── support.py │ │ ├── test_encoding.py │ │ ├── test_formfiller.py │ │ ├── test_parser.py │ │ ├── test_parser2.py │ │ ├── test_sanitizer.py │ │ ├── test_serializer.py │ │ ├── test_stream.py │ │ ├── test_tokenizer.py │ │ ├── test_treewalkers.py │ │ ├── test_whitespace_filter.py │ │ ├── tokenizertotree.py │ │ ├── us-ascii.html │ │ └── utf-8-bom.html │ ├── tokenizer.py │ ├── treebuilders │ │ ├── __init__.py │ │ ├── _base.py │ │ ├── dom.py │ │ ├── etree.py │ │ ├── etree_lxml.py │ │ ├── simpletree.py │ │ └── soup.py │ ├── treewalkers │ │ ├── __init__.py │ │ ├── _base.py │ │ ├── dom.py │ │ ├── etree.py │ │ ├── genshistream.py │ │ ├── lxmletree.py │ │ ├── pulldom.py │ │ ├── simpletree.py │ │ └── soup.py │ └── utils.py ├── rdfextras │ ├── __init__.py │ ├── sparql │ │ ├── __init__.py │ │ ├── algebra.py │ │ ├── components.py │ │ ├── evaluate.py │ │ ├── graph.py │ │ ├── operators.py │ │ ├── parser.py │ │ ├── processor.py │ │ ├── pyparsing.py │ │ └── query.py │ └── tools │ │ ├── CSVWriter.py │ │ ├── DatabaseStats.py │ │ ├── EARLPlugin.py │ │ ├── FixTypeViews.py │ │ ├── QueryRunner.py │ │ ├── QueryStats.py │ │ ├── __init__.py │ │ ├── describer.py │ │ ├── pathutils.py │ │ ├── rdfpipe.py │ │ └── sparqler.py ├── rdflib │ ├── LICENSE │ ├── __init__.py │ ├── collection.py │ ├── compare.py │ ├── events.py │ ├── exceptions.py │ ├── graph.py │ ├── namespace.py │ ├── parser.py │ ├── plugin.py │ ├── plugins │ │ ├── __init__.py │ │ ├── memory.py │ │ ├── parsers │ │ │ ├── __init__.py │ │ │ ├── notation3.py │ │ │ ├── nt.py │ │ │ ├── ntriples.py │ │ │ ├── rdfa │ │ │ │ ├── __init__.py │ │ │ │ ├── embeddedrdf.py │ │ │ │ ├── literal.py │ │ │ │ ├── options.py │ │ │ │ ├── parse.py │ │ │ │ ├── state.py │ │ │ │ └── transform │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── headabout.py │ │ │ ├── rdfxml.py │ │ │ └── trix.py │ │ ├── serializers │ │ │ ├── __init__.py │ │ │ ├── n3.py │ │ │ ├── nt.py │ │ │ ├── rdfxml.py │ │ │ ├── trix.py │ │ │ ├── turtle.py │ │ │ └── xmlwriter.py │ │ └── sleepycat.py │ ├── query.py │ ├── serializer.py │ ├── store.py │ ├── term.py │ └── util.py ├── rdflib_schemaorg_csv.py └── simplejson │ ├── __init__.py │ ├── _speedups.c │ ├── decoder.py │ ├── encoder.py │ ├── scanner.py │ └── tool.py ├── mappings ├── dbpedia-2011-07-31.rdf ├── schema-org-all.json └── sioc-2011-08-01.rdf ├── templates ├── base.tpl ├── d_table.css ├── d_table_jui.css ├── jquery-ui-1.8.4.custom.css ├── jquery.dataTables.min.js ├── jquery.js └── web.instata-style.css ├── test ├── potd_0.csv └── potd_1.csv ├── web.instata.config └── web.instata.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /doc/example_output_html.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 |
RecipenameauthorpublishDate
bbMom's World Famous Banana BreadJohn SmithMay 8, 2009
mfMichael's favourite saladMichael HausenblasJuly 31, 2011
t1Test 1Michael HausenblasJuly 31, 2011
t2Test 2Michael HausenblasJuly 31, 2011
-------------------------------------------------------------------------------- /doc/example_output_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mhausenblas/web.instata/ccebeb2d51c42e849152363ce75b22def579f4a3/doc/example_output_screenshot.png -------------------------------------------------------------------------------- /lib/html5lib/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | HTML parsing library based on the WHATWG "HTML5" 3 | specification. The parser is designed to be compatible with existing 4 | HTML found in the wild and implements well-defined error recovery that 5 | is largely compatible with modern desktop web browsers. 6 | 7 | Example usage: 8 | 9 | import html5lib 10 | f = open("my_document.html") 11 | tree = html5lib.parse(f) 12 | """ 13 | __version__ = "0.95-dev" 14 | from html5parser import HTMLParser, parse, parseFragment 15 | from treebuilders import getTreeBuilder 16 | from treewalkers import getTreeWalker 17 | from serializer import serialize 18 | -------------------------------------------------------------------------------- /lib/html5lib/filters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mhausenblas/web.instata/ccebeb2d51c42e849152363ce75b22def579f4a3/lib/html5lib/filters/__init__.py -------------------------------------------------------------------------------- /lib/html5lib/filters/_base.py: -------------------------------------------------------------------------------- 1 | 2 | class Filter(object): 3 | def __init__(self, source): 4 | self.source = source 5 | 6 | def __iter__(self): 7 | return iter(self.source) 8 | 9 | def __getattr__(self, name): 10 | return getattr(self.source, name) 11 | -------------------------------------------------------------------------------- /lib/html5lib/filters/formfiller.py: -------------------------------------------------------------------------------- 1 | # 2 | # The goal is to finally have a form filler where you pass data for 3 | # each form, using the algorithm for "Seeding a form with initial values" 4 | # See http://www.whatwg.org/specs/web-forms/current-work/#seeding 5 | # 6 | 7 | import _base 8 | 9 | from html5lib.constants import spaceCharacters 10 | spaceCharacters = u"".join(spaceCharacters) 11 | 12 | class SimpleFilter(_base.Filter): 13 | def __init__(self, source, fieldStorage): 14 | _base.Filter.__init__(self, source) 15 | self.fieldStorage = fieldStorage 16 | 17 | def __iter__(self): 18 | field_indices = {} 19 | state = None 20 | field_name = None 21 | for token in _base.Filter.__iter__(self): 22 | type = token["type"] 23 | if type in ("StartTag", "EmptyTag"): 24 | name = token["name"].lower() 25 | if name == "input": 26 | field_name = None 27 | field_type = None 28 | input_value_index = -1 29 | input_checked_index = -1 30 | for i,(n,v) in enumerate(token["data"]): 31 | n = n.lower() 32 | if n == u"name": 33 | field_name = v.strip(spaceCharacters) 34 | elif n == u"type": 35 | field_type = v.strip(spaceCharacters) 36 | elif n == u"checked": 37 | input_checked_index = i 38 | elif n == u"value": 39 | input_value_index = i 40 | 41 | value_list = self.fieldStorage.getlist(field_name) 42 | field_index = field_indices.setdefault(field_name, 0) 43 | if field_index < len(value_list): 44 | value = value_list[field_index] 45 | else: 46 | value = "" 47 | 48 | if field_type in (u"checkbox", u"radio"): 49 | if value_list: 50 | if token["data"][input_value_index][1] == value: 51 | if input_checked_index < 0: 52 | token["data"].append((u"checked", u"")) 53 | field_indices[field_name] = field_index + 1 54 | elif input_checked_index >= 0: 55 | del token["data"][input_checked_index] 56 | 57 | elif field_type not in (u"button", u"submit", u"reset"): 58 | if input_value_index >= 0: 59 | token["data"][input_value_index] = (u"value", value) 60 | else: 61 | token["data"].append((u"value", value)) 62 | field_indices[field_name] = field_index + 1 63 | 64 | field_type = None 65 | field_name = None 66 | 67 | elif name == "textarea": 68 | field_type = "textarea" 69 | field_name = dict((token["data"])[::-1])["name"] 70 | 71 | elif name == "select": 72 | field_type = "select" 73 | attributes = dict(token["data"][::-1]) 74 | field_name = attributes.get("name") 75 | is_select_multiple = "multiple" in attributes 76 | is_selected_option_found = False 77 | 78 | elif field_type == "select" and field_name and name == "option": 79 | option_selected_index = -1 80 | option_value = None 81 | for i,(n,v) in enumerate(token["data"]): 82 | n = n.lower() 83 | if n == "selected": 84 | option_selected_index = i 85 | elif n == "value": 86 | option_value = v.strip(spaceCharacters) 87 | if option_value is None: 88 | raise NotImplementedError("