├── tests ├── __init__.py ├── test_resource.py ├── test_iri.py ├── test_uri.py ├── cases.py └── test_template.py ├── MANIFEST.in ├── uricore ├── __init__.py ├── wkz_internal.py ├── template.py ├── wkz_wsgi.py ├── core.py ├── wkz_datastructures.py └── wkz_urls.py ├── .travis.yml ├── .gitignore ├── setup.py ├── README.md └── LICENSE /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md LICENSE 2 | -------------------------------------------------------------------------------- /uricore/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | __all__ = ['IRI', 'URI'] 3 | 4 | from uricore.core import IRI, URI 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 2.6 4 | - 2.7 5 | script: nosetests 6 | install: 7 | - pip install nose 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | .noseids 3 | 4 | # Packages 5 | *.egg 6 | *.egg-info 7 | dist 8 | build 9 | eggs 10 | parts 11 | bin 12 | var 13 | sdist 14 | develop-eggs 15 | .installed.cfg 16 | 17 | # Installer logs 18 | pip-log.txt 19 | 20 | # Unit test / coverage reports 21 | .coverage 22 | .tox 23 | 24 | #Translations 25 | *.mo 26 | 27 | #Mr Developer 28 | .mr.developer.cfg 29 | -------------------------------------------------------------------------------- /uricore/wkz_internal.py: -------------------------------------------------------------------------------- 1 | class _Missing(object): 2 | 3 | def __repr__(self): 4 | return 'no value' 5 | 6 | def __reduce__(self): 7 | return '_missing' 8 | 9 | _missing = _Missing() 10 | 11 | 12 | def _decode_unicode(value, charset, errors): 13 | """Like the regular decode function but this one raises an 14 | `HTTPUnicodeError` if errors is `strict`.""" 15 | fallback = None 16 | if errors.startswith('fallback:'): 17 | fallback = errors[9:] 18 | errors = 'strict' 19 | try: 20 | return value.decode(charset, errors) 21 | except UnicodeError, e: 22 | if fallback is not None: 23 | return value.decode(fallback, 'replace') 24 | from werkzeug.exceptions import HTTPUnicodeError 25 | raise HTTPUnicodeError(str(e)) 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | from setuptools import setup 5 | 6 | 7 | setup( 8 | name='uricore', 9 | version='0.1.2', 10 | description='URI Parsing for Humans.', 11 | long_description=open('README.md').read(), 12 | author='Matthew Hooker & Jeremy Avnet & Matt Chisholm', 13 | author_email='uricore@librelist.com', 14 | url='https://github.com/core/uricore', 15 | packages= ['uricore',], 16 | license='BSD', 17 | classifiers=( 18 | 'Development Status :: 4 - Beta', 19 | 'Intended Audience :: Developers', 20 | 'Natural Language :: English', 21 | 'License :: OSI Approved :: BSD License', 22 | 'Programming Language :: Python', 23 | 'Programming Language :: Python :: 2.6', 24 | 'Programming Language :: Python :: 2.7', 25 | ) 26 | ) 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # urilib 2 | 3 | [![Build Status](https://secure.travis-ci.org/core/uricore.png?branch=master)](http://travis-ci.org/core/uricore) 4 | 5 | **WARNING: Rough, raw, and fast changing code. Check back later. ;-)** 6 | 7 | -- 8 | 9 | Example of use: 10 | 11 | >>> from httpcore.uri import URI 12 | >>> from httpcore.iri import IRI 13 | >>> iri = IRI(u'http://\N{SNOWMAN}/') 14 | >>> iri 15 | IRI(u'http://\u2603/') 16 | >>> uri = URI(iri) 17 | >>> uri 18 | URI('http://xn--n3h/') 19 | >>> iri.netloc 20 | u'http://\u2603/' 21 | >>> iri.hostname 22 | '\u2603' 23 | >>> iri.port is None 24 | True 25 | >>> iri.path 26 | u'/' 27 | >>> hasattr(iri, '__hash__') 28 | True 29 | >>> iri.replace(port=8000) 30 | IRI(u'http://\u2603:8000/') 31 | >>> iriq = iri.update_query({'foo': u'42'}) 32 | >>> iriq 33 | IRI(u'http://\u2603/?foo=42') 34 | >>> iriq.update_query(foo=None) 35 | IRI(u'http://\u2603/') 36 | >>> iriq.query 37 | MultiDict([('foo', '42')]) 38 | >>> URI.from_template('http://{domain}/find{?year*}', domain="example.com", 39 | ... year=("1965", "2000", "2012")) 40 | URI('http://example.com/find?year=1965&year=2000&year=2012') 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012, Matthew Hooker & Jeremy Avnet & Matt Chisholm 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 9 | -------------------------------------------------------------------------------- /tests/test_resource.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | import unittest 3 | 4 | from nose.plugins.skip import SkipTest 5 | 6 | from uricore import IRI, URI 7 | from uricore.wkz_datastructures import MultiDict 8 | 9 | 10 | class TestURICore(unittest.TestCase): 11 | 12 | def setUp(self): 13 | self.uri = URI("http://example.com?foo=bar") 14 | 15 | def test_hashability(self): 16 | iri1 = IRI(u'http://\N{CLOUD}/') 17 | iri2 = IRI(u'http://\N{CLOUD}/') 18 | self.assertEquals(hash(iri1), hash(iri2)) 19 | 20 | uri1 = URI(iri1) 21 | uri2 = URI('http://xn--l3h/') 22 | self.assertEquals(hash(uri1), hash(uri2)) 23 | 24 | self.assertNotEquals(hash(iri1), hash(uri1)) 25 | 26 | def test_eq(self): 27 | iri1 = IRI(u'http://\N{CLOUD}/') 28 | iri2 = IRI(u'http://\N{CLOUD}/') 29 | self.assertEquals(iri1, iri2) 30 | 31 | uri1 = URI(iri1) 32 | uri2 = URI('http://xn--l3h/') 33 | self.assertEquals(uri1, uri2) 34 | 35 | self.assertNotEquals(iri1, uri1) 36 | 37 | def test_ne(self): 38 | iri1 = IRI(u'http://\N{CLOUD}/') 39 | iri2 = IRI(u'http://\N{CLOUD}/') 40 | self.assertFalse(iri1 != iri2) 41 | 42 | uri1 = URI(iri1) 43 | uri2 = URI('http://xn--l3h/') 44 | self.assertFalse(uri1 != uri2) 45 | 46 | self.assertTrue(iri1 != uri1) 47 | 48 | def test_query_param_breaks_equality_(self): 49 | iri = IRI(u'http://\N{CLOUD}/') 50 | iri2 = IRI(u'http://\N{CLOUD}/?q=a') 51 | self.assertNotEquals(iri, iri2) 52 | 53 | def test_iri_add_port(self): 54 | iri = IRI(u'http://\N{SNOWMAN}/') 55 | new_iri = iri.update(port=8000) 56 | self.assertEquals(iri.netloc + ':8000', new_iri.netloc) 57 | self.assertEquals(new_iri.port, '8000') 58 | self.assertEquals(iri.port, None) 59 | 60 | def test_iri_update_query(self): 61 | iri = IRI(u'http://\N{SNOWMAN}/') 62 | q = iri.query 63 | q.update({'foo': u'42'}) 64 | iri2 = iri.update_query(q) 65 | self.assertNotEquals(iri, iri2) 66 | self.assertTrue(isinstance(iri2, IRI)) 67 | self.assertEquals(repr(iri.query), "MultiDict([])") 68 | self.assertEquals(repr(iri2), "IRI(u'http://\u2603/?foo=42')") 69 | self.assertEquals(repr(iri2.query), "MultiDict([('foo', u'42')])") 70 | 71 | def test_query_is_immutable(self): 72 | self.uri.query.add("foo", "baz") 73 | self.assertEquals(set(['bar']), set(self.uri.query.getlist('foo'))) 74 | 75 | def test_configurable_multi_dict_class(self): 76 | class CustomMultiDict(MultiDict): 77 | pass 78 | iri = IRI(u'http://\N{SNOWMAN}/', query_cls=CustomMultiDict) 79 | self.assertTrue(isinstance(iri.query, CustomMultiDict)) 80 | -------------------------------------------------------------------------------- /uricore/template.py: -------------------------------------------------------------------------------- 1 | import re 2 | from uricore.wkz_urls import url_quote 3 | 4 | 5 | def _format_mapping(operator, item): 6 | try: 7 | k, v, mapped = item 8 | except ValueError: 9 | k, v = item 10 | mapped = False 11 | 12 | if operator in ['#', '+']: 13 | # From http://tools.ietf.org/html/rfc6570#section-1.5 14 | safe = ':/?#[]@!$&\'\"()*/+,;=' 15 | else: 16 | safe = '' 17 | 18 | if isinstance(v, (list, tuple)): 19 | v = ','.join(url_quote(x, safe=safe) for x in v) 20 | else: 21 | v = url_quote(v, safe=safe) 22 | 23 | if operator in [';', '?', '&'] or mapped: 24 | if not v: 25 | mid = '' if operator == ';' else '=' 26 | else: 27 | mid = '=' 28 | 29 | return u"{0}{1}{2}".format(url_quote(k, safe=safe), mid, v) 30 | else: 31 | return u"{0}".format(v) 32 | 33 | 34 | def _template_joiner(operator): 35 | if operator in ['#', '+', '']: 36 | return ',' 37 | elif operator == '?': 38 | return '&' 39 | elif operator == '.': 40 | return'.' 41 | return operator 42 | 43 | 44 | def _varspec_expansion(operator, varspec, data): 45 | portion = None 46 | explode = False 47 | 48 | if ':' in varspec: 49 | varspec, portion = varspec.split(':', 1) 50 | portion = int(portion) 51 | 52 | if varspec.endswith('*'): 53 | varspec = varspec[:-1] 54 | explode = True 55 | 56 | value = data.get(varspec) 57 | 58 | if value == None: 59 | return [] 60 | 61 | try: 62 | if len(value) == 0 and value != "": 63 | return [] 64 | except TypeError: 65 | pass 66 | 67 | try: 68 | if explode: 69 | return [(k, v, True) for k,v in value.iteritems()] 70 | else: 71 | parts = [] 72 | for k, v in value.iteritems(): 73 | parts += [k, v] 74 | return [(varspec, parts)] 75 | except AttributeError: 76 | pass 77 | 78 | if isinstance(value, (list, tuple)): 79 | if explode: 80 | return [(varspec, v) for v in value] 81 | else: 82 | return [(varspec, value)] 83 | 84 | value = unicode(value) 85 | 86 | if portion is not None: 87 | value = value[:portion] 88 | 89 | return [(varspec, value)] 90 | 91 | 92 | def uri_template(template, **kwargs): 93 | 94 | def template_expansion(matchobj): 95 | varlist = matchobj.group(1) 96 | operator = '' 97 | 98 | if re.match(r"\+|#|\.|/|;|\?|&", varlist): 99 | operator = varlist[0] 100 | varlist = varlist[1:] 101 | 102 | prefix = '' if operator == '+' else operator 103 | joiner = _template_joiner(operator) 104 | 105 | params = [] 106 | for varspec in varlist.split(','): 107 | params += _varspec_expansion(operator, varspec, kwargs) 108 | 109 | uri = [_format_mapping(operator, item) for item in params] 110 | 111 | if not uri: 112 | return "" 113 | 114 | return prefix + joiner.join(uri) 115 | 116 | return re.sub(r"{(.*?)}", template_expansion, template) 117 | -------------------------------------------------------------------------------- /tests/test_iri.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import unicode_literals 3 | import unittest 4 | 5 | from nose.plugins.skip import SkipTest 6 | 7 | from uricore import IRI, URI 8 | from uricore.wkz_datastructures import MultiDict 9 | 10 | import cases 11 | 12 | 13 | class TestIRI(unittest.TestCase): 14 | 15 | def test_str_input_fails(self): 16 | self.assertRaises(TypeError, IRI, 'http://example.com'.encode('ascii')) 17 | 18 | def test_uri_input(self): 19 | iri = TestIRISnowman.ri 20 | uri = URI(iri) 21 | self.assertEquals(str(iri), str(IRI(uri))) 22 | self.assertEquals(unicode(iri), unicode(IRI(uri))) 23 | 24 | def test_repr(self): 25 | iri = TestIRISnowman.ri 26 | eval_iri = eval(repr(iri)) 27 | self.assertEquals(iri, eval_iri) 28 | 29 | def test_idn_ascii_encoding(self): 30 | iri = IRI("http://Bücher.ch/") 31 | self.assertEquals(str(iri), "http://xn--bcher-kva.ch/") 32 | 33 | def test_convert_pile_of_poo(self): 34 | raise SkipTest("Not Implemented") 35 | uri = URI("http://u:p@www.xn--ls8h.la:80/path?q=arg#frag".encode('utf-8')) 36 | try: 37 | IRI(uri) 38 | except Exception as e: 39 | assert False, "{0} {1}".format(type(e), e) 40 | 41 | def test_non_existent_scheme(self): 42 | try: 43 | IRI("watwatwat://wat.wat/wat") 44 | except Exception as e: 45 | assert False, "{0} {1}".format(type(e), e) 46 | 47 | def test_nonascii_query_keys(self): 48 | IRI(u'http://example.com/?gro\xdf=great') 49 | 50 | def test_iri_from_lenient(self): 51 | lenient_iri = IRI.from_lenient(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)') 52 | self.assertEquals(repr(lenient_iri), "IRI(u'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29')") 53 | 54 | 55 | class TestIRISnowman(cases.IdentifierCase): 56 | 57 | ri = IRI("http://u:p@www.\N{SNOWMAN}:80/path?q=arg#frag") 58 | expect = dict( 59 | scheme="http", 60 | auth="u:p", 61 | hostname="www.\u2603", 62 | port="80", 63 | path="/path", 64 | query=MultiDict([('q', 'arg')]), 65 | querystr='q=arg', 66 | fragment="frag", 67 | netloc="u:p@www.\u2603:80", 68 | ) 69 | 70 | 71 | class TestIRIConvertedSnowman(cases.IdentifierCase): 72 | 73 | uri = URI(("http://u:p@www.%s:80/path?q=arg#frag" 74 | % u"\N{SNOWMAN}".encode('idna')).encode('utf-8')) 75 | ri = IRI(uri) 76 | expect = dict( 77 | scheme="http", 78 | auth="u:p", 79 | hostname="www.\u2603", 80 | port="80", 81 | path="/path", 82 | query=MultiDict([('q', 'arg')]), 83 | querystr='q=arg', 84 | fragment="frag", 85 | netloc="u:p@www.\u2603:80", 86 | ) 87 | 88 | 89 | class TestIRIPileOfPoo(cases.IdentifierCase): 90 | 91 | ri = IRI("http://u:p@www.💩.la:80/path?q=arg#frag") 92 | expect = dict( 93 | scheme="http", 94 | auth="u:p", 95 | hostname="www.💩.la", 96 | port="80", 97 | path="/path", 98 | query=MultiDict([('q', 'arg')]), 99 | querystr='q=arg', 100 | fragment="frag", 101 | netloc="u:p@www.💩.la:80", 102 | ) 103 | 104 | 105 | class TestIRIIPv6(cases.IdentifierCase): 106 | 107 | ri = IRI("http://u:p@[2a00:1450:4001:c01::67]/path?q=arg#frag") 108 | expect = dict( 109 | scheme="http", 110 | auth="u:p", 111 | hostname="2a00:1450:4001:c01::67", 112 | port=None, 113 | path="/path", 114 | query=MultiDict([('q', 'arg')]), 115 | querystr='q=arg', 116 | fragment="frag", 117 | netloc="u:p@[2a00:1450:4001:c01::67]", 118 | ) 119 | 120 | 121 | class TestIRIIPv6WithPort(cases.IdentifierCase): 122 | 123 | ri = IRI("http://u:p@[2a00:1450:4001:c01::67]:80/path?q=arg#frag") 124 | expect = dict( 125 | scheme="http", 126 | auth="u:p", 127 | hostname="2a00:1450:4001:c01::67", 128 | port="80", 129 | path="/path", 130 | query=MultiDict([('q', 'arg')]), 131 | querystr='q=arg', 132 | fragment="frag", 133 | netloc="u:p@[2a00:1450:4001:c01::67]:80", 134 | ) 135 | 136 | 137 | class TestIRIJoin(cases.JoinAndUpdateCase): 138 | 139 | RI = IRI 140 | 141 | def test_cannot_join_uri(self): 142 | self.assertRaises(TypeError, 143 | IRI('http://localhost:8000').join, 144 | URI(str('/path/to/file')) 145 | ) 146 | 147 | 148 | class TestIRINormalizes(cases.NormalizeCase): 149 | 150 | RI = IRI 151 | -------------------------------------------------------------------------------- /tests/test_uri.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | import unittest 3 | 4 | from nose.plugins.skip import SkipTest 5 | 6 | from uricore import IRI, URI 7 | from uricore.wkz_datastructures import MultiDict 8 | 9 | import cases 10 | 11 | 12 | class TestURI(unittest.TestCase): 13 | 14 | def test_unicode_input_fails(self): 15 | self.assertRaises(TypeError, URI, u"http://www.example.com/") 16 | 17 | def test_iri_input(self): 18 | uri = TestURISnowman.ri 19 | iri = IRI(uri) 20 | self.assertEquals(str(uri), str(URI(iri))) 21 | self.assertEquals(unicode(uri), unicode(URI(iri))) 22 | 23 | def test_repr(self): 24 | uri = TestURISnowman.ri 25 | eval_uri = eval(repr(uri)) 26 | self.assertEquals(uri, eval_uri) 27 | 28 | def test_idn_ascii_encoding(self): 29 | uri = URI(u"http://Bücher.ch/".encode('utf-8')) 30 | self.assertEquals(str(uri), "http://xn--bcher-kva.ch/") 31 | 32 | def test_convert_pile_of_poo(self): 33 | raise SkipTest("Not Implemented") 34 | iri = IRI(u"http://u:p@www.💩.la:80/path?q=arg#frag") 35 | try: 36 | URI(iri) 37 | except Exception as e: 38 | assert False, "{0} {1}".format(type(e), e) 39 | 40 | def test_non_existent_scheme(self): 41 | try: 42 | URI("watwatwat://wat.wat/wat") 43 | except Exception as e: 44 | assert False, "{0} {1}".format(type(e), e) 45 | 46 | def test_uri_from_lenient(self): 47 | lenient_uri = URI.from_lenient(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)'.encode('utf-8')) 48 | self.assertEquals(repr(lenient_uri), "URI('http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29')") 49 | 50 | 51 | class TestURISnowman(cases.IdentifierCase): 52 | 53 | ri = URI("http://u:p@www.%s:80/path?q=arg#frag" 54 | % u"\N{SNOWMAN}".encode('idna')) 55 | expect = dict( 56 | scheme="http", 57 | auth="u:p", 58 | hostname="www.xn--n3h", 59 | port="80", 60 | path="/path", 61 | query=MultiDict([('q', 'arg')]), 62 | querystr='q=arg', 63 | fragment="frag", 64 | netloc="u:p@www.xn--n3h:80", 65 | ) 66 | 67 | 68 | class TestURIConvertedSnowman(cases.IdentifierCase): 69 | 70 | iri = IRI(u"http://u:p@www.\N{SNOWMAN}:80/path?q=arg#frag") 71 | ri = URI(iri) 72 | expect = dict( 73 | scheme="http", 74 | auth="u:p", 75 | hostname="www.xn--n3h", 76 | port="80", 77 | path="/path", 78 | query=MultiDict([('q', 'arg')]), 79 | querystr='q=arg', 80 | fragment="frag", 81 | netloc="u:p@www.xn--n3h:80", 82 | ) 83 | 84 | 85 | class TestURIPileOfPoo(cases.IdentifierCase): 86 | 87 | ri = URI("http://u:p@www.xn--ls8h.la:80/path?q=arg#frag") 88 | expect = dict( 89 | scheme="http", 90 | auth="u:p", 91 | hostname="www.xn--ls8h.la", 92 | port="80", 93 | path="/path", 94 | query=MultiDict([('q', 'arg')]), 95 | querystr='q=arg', 96 | fragment="frag", 97 | netloc="u:p@www.xn--ls8h.la:80", 98 | ) 99 | 100 | 101 | class TestURIIPv6(cases.IdentifierCase): 102 | 103 | ri = URI("http://u:p@[2a00:1450:4001:c01::67]/path?q=arg#frag") 104 | expect = dict( 105 | scheme="http", 106 | auth="u:p", 107 | hostname="2a00:1450:4001:c01::67", 108 | port=None, 109 | path="/path", 110 | query=MultiDict([('q', 'arg')]), 111 | querystr='q=arg', 112 | fragment="frag", 113 | netloc="u:p@[2a00:1450:4001:c01::67]", 114 | ) 115 | 116 | 117 | class TestURIIPv6WithPort(cases.IdentifierCase): 118 | 119 | ri = URI("http://u:p@[2a00:1450:4001:c01::67]:80/path?q=arg#frag") 120 | expect = dict( 121 | scheme="http", 122 | auth="u:p", 123 | hostname="2a00:1450:4001:c01::67", 124 | port="80", 125 | path="/path", 126 | query=MultiDict([('q', 'arg')]), 127 | querystr='q=arg', 128 | fragment="frag", 129 | netloc="u:p@[2a00:1450:4001:c01::67]:80", 130 | ) 131 | 132 | 133 | class TestURIJoin(cases.JoinAndUpdateCase): 134 | 135 | RI = lambda self, s: URI(self._literal_wrapper(s), encoding='utf-8') 136 | 137 | def _literal_wrapper(self, lit): 138 | return lit.encode('utf-8') 139 | 140 | def test_cannot_join_uri(self): 141 | self.assertRaises(TypeError, 142 | self.RI('http://localhost:8000').join, 143 | IRI(u'/path/to/file') 144 | ) 145 | 146 | 147 | class TestURINormalizes(cases.NormalizeCase): 148 | 149 | RI = URI 150 | 151 | def _literal_wrapper(self, lit): 152 | return lit.encode('utf-8') 153 | -------------------------------------------------------------------------------- /tests/cases.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import unicode_literals 3 | import unittest 4 | 5 | from uricore.wkz_datastructures import MultiDict 6 | 7 | 8 | class IdentifierCase(unittest.TestCase): 9 | # Test properties and representations 10 | # 11 | # Class variables: 12 | # ri = URI or IRI object 13 | # expect = dict of expected results 14 | 15 | def test_scheme_baby(self): 16 | self.assertEquals(self.ri.scheme, self.expect['scheme']) 17 | 18 | def test_auth(self): 19 | self.assertEquals(self.ri.auth, self.expect['auth']) 20 | 21 | def test_hostname(self): 22 | self.assertEquals(self.ri.hostname, self.expect['hostname']) 23 | 24 | def test_port(self): 25 | self.assertEquals(self.ri.port, self.expect['port']) 26 | 27 | def test_path(self): 28 | self.assertEquals(self.ri.path, self.expect['path']) 29 | 30 | def test_query(self): 31 | self.assertEquals(self.ri.query, self.expect['query']) 32 | 33 | def test_querystr(self): 34 | self.assertEquals(self.ri.querystr, self.expect['querystr']) 35 | 36 | def test_fragment(self): 37 | self.assertEquals(self.ri.fragment, self.expect['fragment']) 38 | 39 | def test_netloc(self): 40 | self.assertEquals(self.ri.netloc, self.expect['netloc']) 41 | 42 | 43 | class JoinAndUpdateCase(unittest.TestCase): 44 | # Test join and update 45 | # 46 | # Class variables: 47 | # RI = IRI/URI constructor given a unicode object 48 | 49 | def _literal_wrapper(self, lit): 50 | return lit 51 | 52 | def test_join_path_to_netloc(self): 53 | ri = self.RI('http://localhost:8000').join(self.RI('/path/to/file')) 54 | self.assertEquals(ri.scheme, 'http') 55 | self.assertEquals(ri.netloc, 'localhost:8000') 56 | self.assertEquals(ri.path, '/path/to/file') 57 | 58 | def test_join_path_to_path(self): 59 | ri = self.RI('http://localhost:8000/here/is/the').join(self.RI('/path/to/file')) 60 | self.assertEquals(ri.scheme, 'http') 61 | self.assertEquals(ri.netloc, 'localhost:8000') 62 | self.assertEquals(ri.path, '/here/is/the/path/to/file') 63 | 64 | def test_join_fragment_and_path(self): 65 | ri = self.RI('http://localhost:8000/here/is/the').join(self.RI('/thing#fragment')) 66 | self.assertEquals(ri.path, '/here/is/the/thing') 67 | self.assertEquals(ri.fragment, 'fragment') 68 | 69 | def test_join_query_to_path(self): 70 | ri = self.RI('http://localhost:8000/path/to/file').join(self.RI('?yes=no&left=right')) 71 | self.assertEquals(ri.path, '/path/to/file') 72 | self.assertEquals(ri.query, MultiDict(dict(yes='no', left='right'))) 73 | self.assertEquals(ri.querystr, 'yes=no&left=right') 74 | 75 | def test_join_query_to_query(self): 76 | ri = self.RI('http://localhost:8000/path/to/file?yes=no').join(self.RI('?left=right')) 77 | self.assertEquals(ri.path, '/path/to/file') 78 | self.assertEquals(ri.query, MultiDict(dict(yes='no', left='right'))) 79 | self.assertEquals(ri.querystr, 'yes=no&left=right') 80 | 81 | def test_join_query_to_query_to_make_multi_query(self): 82 | ri = self.RI('http://localhost:8000/path/to/file?yes=no').join(self.RI('?yes=maybe&left=right')) 83 | self.assertEquals(ri.path, '/path/to/file') 84 | self.assertEquals(ri.query, 85 | MultiDict([('yes', 'no'), ('yes', 'maybe'), ('left', 'right')])) 86 | self.assertEquals(ri.querystr, 'yes=no&yes=maybe&left=right') 87 | 88 | def test_join_nonascii_query_to_query(self): 89 | ri = self.RI('http://localhost:8000/path/to/file?yes=no').join(self.RI('?h%C3%A4us=h%C3%B6f')) 90 | self.assertEquals(ri.path, '/path/to/file') 91 | self.assertEquals(ri.query, MultiDict([('häus'.encode('utf-8'), 'höf'), ('yes', 'no')])) 92 | self.assertEquals(ri.querystr, 'h%C3%A4us=h%C3%B6f&yes=no') 93 | 94 | def test_join_fragment_to_query(self): 95 | ri = self.RI('http://rubberchick.en/path/to/file?yes=no').join(self.RI('#giblets')) 96 | self.assertEquals(ri.path, '/path/to/file') 97 | self.assertEquals(ri.query, MultiDict(dict(yes='no',))) 98 | self.assertEquals(ri.querystr, 'yes=no') 99 | self.assertEquals(ri.fragment, 'giblets') 100 | 101 | def test_join_scheme_with_path(self): 102 | ri = self.RI('gopher://') 103 | result = ri.join(self.RI('nowhere')) 104 | self.assertEquals(result.scheme, 'gopher') 105 | self.assertEquals(result.path, '/nowhere') 106 | 107 | def test_join_no_hostname_with_hostname(self): 108 | ri = self.RI('gopher://') 109 | result = ri.join(self.RI('//whole.org/ville')) 110 | self.assertEquals(result.scheme, 'gopher') 111 | self.assertEquals(result.hostname, 'whole.org') 112 | self.assertEquals(result.path, '/ville') 113 | 114 | def test_join_string(self): 115 | ri = self.RI('http://localhost:8000').join(self._literal_wrapper('/path/to/file')) 116 | self.assertEquals(ri.scheme, 'http') 117 | self.assertEquals(ri.netloc, 'localhost:8000') 118 | self.assertEquals(ri.path, '/path/to/file') 119 | 120 | def test_update_query_with_query_object_to_make_multi_query(self): 121 | ri = self.RI('http://localhost:8000/path/to/file?yes=no') 122 | ri = ri.update_query(MultiDict(dict(yes='maybe', left='right'))) 123 | self.assertEquals(ri.path, '/path/to/file') 124 | self.assertEquals(ri.query, 125 | MultiDict([('yes', 'no'), ('yes', 'maybe'), ('left', 'right')])) 126 | self.assertEquals(ri.querystr, 'yes=no&yes=maybe&left=right') 127 | 128 | def test_update_query_with_nonascii_query_object(self): 129 | ri = self.RI('http://localhost:8000/path/to/file?yes=no') 130 | ri = ri.update_query(MultiDict({'häus':'höf'})) 131 | self.assertEquals(ri.path, '/path/to/file') 132 | self.assertEquals(ri.query, MultiDict([('häus'.encode('utf-8'), 'höf'), ('yes', 'no')])) 133 | self.assertEquals(ri.querystr, 'yes=no&h%C3%A4us=h%C3%B6f') 134 | 135 | 136 | class NormalizeCase(unittest.TestCase): 137 | # Test normalization 138 | # 139 | # Class variables: 140 | # RI = IRI/URI constructor given a unicode object 141 | 142 | def _literal_wrapper(self, lit): 143 | return lit 144 | 145 | def test_normalizes_empty_fragment(self): 146 | ri = self.RI(self._literal_wrapper('http://example.com/#')) 147 | self.assertEquals(ri._identifier, 'http://example.com/') 148 | 149 | def test_normalizes_empty_query(self): 150 | ri = self.RI(self._literal_wrapper('http://example.com/?')) 151 | self.assertEquals(ri._identifier, 'http://example.com/') 152 | 153 | def test_normalizes_empty_query_and_fragment(self): 154 | ri = self.RI(self._literal_wrapper('http://example.com/?#')) 155 | self.assertEquals(ri._identifier, 'http://example.com/') 156 | -------------------------------------------------------------------------------- /uricore/wkz_wsgi.py: -------------------------------------------------------------------------------- 1 | class LimitedStream(object): 2 | """Wraps a stream so that it doesn't read more than n bytes. If the 3 | stream is exhausted and the caller tries to get more bytes from it 4 | :func:`on_exhausted` is called which by default returns an empty 5 | string. The return value of that function is forwarded 6 | to the reader function. So if it returns an empty string 7 | :meth:`read` will return an empty string as well. 8 | 9 | The limit however must never be higher than what the stream can 10 | output. Otherwise :meth:`readlines` will try to read past the 11 | limit. 12 | 13 | The `silent` parameter has no effect if :meth:`is_exhausted` is 14 | overriden by a subclass. 15 | 16 | .. versionchanged:: 0.6 17 | Non-silent usage was deprecated because it causes confusion. 18 | If you want that, override :meth:`is_exhausted` and raise a 19 | :exc:`~exceptions.BadRequest` yourself. 20 | 21 | .. admonition:: Note on WSGI compliance 22 | 23 | calls to :meth:`readline` and :meth:`readlines` are not 24 | WSGI compliant because it passes a size argument to the 25 | readline methods. Unfortunately the WSGI PEP is not safely 26 | implementable without a size argument to :meth:`readline` 27 | because there is no EOF marker in the stream. As a result 28 | of that the use of :meth:`readline` is discouraged. 29 | 30 | For the same reason iterating over the :class:`LimitedStream` 31 | is not portable. It internally calls :meth:`readline`. 32 | 33 | We strongly suggest using :meth:`read` only or using the 34 | :func:`make_line_iter` which safely iterates line-based 35 | over a WSGI input stream. 36 | 37 | :param stream: the stream to wrap. 38 | :param limit: the limit for the stream, must not be longer than 39 | what the string can provide if the stream does not 40 | end with `EOF` (like `wsgi.input`) 41 | :param silent: If set to `True` the stream will allow reading 42 | past the limit and will return an empty string. 43 | """ 44 | 45 | def __init__(self, stream, limit, silent=True): 46 | self._read = stream.read 47 | self._readline = stream.readline 48 | self._pos = 0 49 | self.limit = limit 50 | self.silent = silent 51 | if not silent: 52 | from warnings import warn 53 | warn(DeprecationWarning('non-silent usage of the ' 54 | 'LimitedStream is deprecated. If you want to ' 55 | 'continue to use the stream in non-silent usage ' 56 | 'override on_exhausted.'), stacklevel=2) 57 | 58 | def __iter__(self): 59 | return self 60 | 61 | @property 62 | def is_exhausted(self): 63 | """If the stream is exhausted this attribute is `True`.""" 64 | return self._pos >= self.limit 65 | 66 | def on_exhausted(self): 67 | """This is called when the stream tries to read past the limit. 68 | The return value of this function is returned from the reading 69 | function. 70 | """ 71 | if self.silent: 72 | return '' 73 | from werkzeug.exceptions import BadRequest 74 | raise BadRequest('input stream exhausted') 75 | 76 | def on_disconnect(self): 77 | """What should happen if a disconnect is detected? The return 78 | value of this function is returned from read functions in case 79 | the client went away. By default a 80 | :exc:`~werkzeug.exceptions.ClientDisconnected` exception is raised. 81 | """ 82 | from werkzeug.exceptions import ClientDisconnected 83 | raise ClientDisconnected() 84 | 85 | def exhaust(self, chunk_size=1024 * 16): 86 | """Exhaust the stream. This consumes all the data left until the 87 | limit is reached. 88 | 89 | :param chunk_size: the size for a chunk. It will read the chunk 90 | until the stream is exhausted and throw away 91 | the results. 92 | """ 93 | to_read = self.limit - self._pos 94 | chunk = chunk_size 95 | while to_read > 0: 96 | chunk = min(to_read, chunk) 97 | self.read(chunk) 98 | to_read -= chunk 99 | 100 | def read(self, size=None): 101 | """Read `size` bytes or if size is not provided everything is read. 102 | 103 | :param size: the number of bytes read. 104 | """ 105 | if self._pos >= self.limit: 106 | return self.on_exhausted() 107 | if size is None or size == -1: # -1 is for consistence with file 108 | size = self.limit 109 | to_read = min(self.limit - self._pos, size) 110 | try: 111 | read = self._read(to_read) 112 | except (IOError, ValueError): 113 | return self.on_disconnect() 114 | if to_read and len(read) != to_read: 115 | return self.on_disconnect() 116 | self._pos += len(read) 117 | return read 118 | 119 | def readline(self, size=None): 120 | """Reads one line from the stream.""" 121 | if self._pos >= self.limit: 122 | return self.on_exhausted() 123 | if size is None: 124 | size = self.limit - self._pos 125 | else: 126 | size = min(size, self.limit - self._pos) 127 | try: 128 | line = self._readline(size) 129 | except (ValueError, IOError): 130 | return self.on_disconnect() 131 | if size and not line: 132 | return self.on_disconnect() 133 | self._pos += len(line) 134 | return line 135 | 136 | def readlines(self, size=None): 137 | """Reads a file into a list of strings. It calls :meth:`readline` 138 | until the file is read to the end. It does support the optional 139 | `size` argument if the underlaying stream supports it for 140 | `readline`. 141 | """ 142 | last_pos = self._pos 143 | result = [] 144 | if size is not None: 145 | end = min(self.limit, last_pos + size) 146 | else: 147 | end = self.limit 148 | while 1: 149 | if size is not None: 150 | size -= last_pos - self._pos 151 | if self._pos >= end: 152 | break 153 | result.append(self.readline(size)) 154 | if size is not None: 155 | last_pos = self._pos 156 | return result 157 | 158 | def tell(self): 159 | """Returns the position of the stream. 160 | 161 | .. versionadded:: 0.9 162 | """ 163 | return self._pos 164 | 165 | def next(self): 166 | line = self.readline() 167 | if line is None: 168 | raise StopIteration() 169 | return line 170 | 171 | 172 | def make_limited_stream(stream, limit): 173 | """Makes a stream limited.""" 174 | if not isinstance(stream, LimitedStream): 175 | if limit is None: 176 | raise TypeError('stream not limited and no limit provided.') 177 | stream = LimitedStream(stream, limit) 178 | return stream 179 | 180 | 181 | def make_chunk_iter(stream, separator, limit=None, buffer_size=10 * 1024): 182 | """Works like :func:`make_line_iter` but accepts a separator 183 | which divides chunks. If you want newline based processing 184 | you should use :func:`make_limited_stream` instead as it 185 | supports arbitrary newline markers. 186 | 187 | .. versionadded:: 0.8 188 | 189 | .. versionadded:: 0.9 190 | added support for iterators as input stream. 191 | 192 | :param stream: the stream or iterate to iterate over. 193 | :param separator: the separator that divides chunks. 194 | :param limit: the limit in bytes for the stream. (Usually 195 | content length. Not necessary if the `stream` 196 | is a :class:`LimitedStream`. 197 | :param buffer_size: The optional buffer size. 198 | """ 199 | _read = make_chunk_iter_func(stream, limit, buffer_size) 200 | _split = re.compile(r'(%s)' % re.escape(separator)).split 201 | buffer = [] 202 | while 1: 203 | new_data = _read() 204 | if not new_data: 205 | break 206 | chunks = _split(new_data) 207 | new_buf = [] 208 | for item in chain(buffer, chunks): 209 | if item == separator: 210 | yield ''.join(new_buf) 211 | new_buf = [] 212 | else: 213 | new_buf.append(item) 214 | buffer = new_buf 215 | if buffer: 216 | yield ''.join(buffer) 217 | 218 | -------------------------------------------------------------------------------- /uricore/core.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | __all__ = ['IRI', 'URI'] 3 | 4 | try: 5 | import urlparse 6 | except ImportError: 7 | import urllib.parse as urlparse 8 | 9 | from collections import defaultdict 10 | from template import uri_template 11 | 12 | # TODO: import these from httpcore someday 13 | from . import wkz_urls as urls 14 | from . import wkz_datastructures as datastructures 15 | 16 | 17 | def build_netloc(hostname, auth=None, port=None): 18 | auth = "{0}@".format(auth) if auth else "" 19 | port = ":{0}".format(port) if port else "" 20 | if ':' in hostname: 21 | hostname = '['+hostname+']' 22 | if isinstance(hostname, unicode): 23 | return u"{0}{1}{2}".format(auth, hostname, port) 24 | return "{0}{1}{2}".format(auth, hostname, port) 25 | 26 | 27 | def unsplit(**kwargs): 28 | parts = defaultdict(str) 29 | for k in kwargs: 30 | if kwargs[k]: 31 | parts[k] = kwargs[k] 32 | 33 | if 'netloc' in parts: 34 | netloc = parts['netloc'] 35 | else: 36 | netloc = build_netloc(parts['hostname'], parts.get('auth'), 37 | parts.get('port')) 38 | 39 | return urlparse.urlunsplit(( 40 | parts['scheme'], netloc, 41 | parts['path'], parts['querystr'], 42 | parts['fragment'] 43 | )) 44 | 45 | 46 | def identifier_to_dict(identifier): 47 | fields = ('scheme', 'auth', 'hostname', 'port', 48 | 'path', 'querystr', 'fragment') 49 | values = urls._uri_split(identifier) 50 | d = dict(zip(fields, values)) 51 | 52 | # querystr is a str 53 | if isinstance(d['querystr'], unicode): 54 | d['querystr'] = d['querystr'].encode('utf-8') 55 | 56 | return d 57 | 58 | 59 | class ResourceIdentifier(object): 60 | 61 | def __init__(self, identifier, query_cls=None): 62 | if not isinstance(identifier, basestring): 63 | raise TypeError("Expected str or unicode: %s", type(identifier)) 64 | 65 | self._parts = identifier_to_dict(identifier) 66 | self._identifier = unsplit(**self._parts) 67 | 68 | # NOTE: might be better to subclass instead of pass a query_cls around 69 | self.query_cls = query_cls or datastructures.MultiDict 70 | 71 | def __repr__(self): 72 | return "{0}({1!r})".format(type(self).__name__, self._identifier) 73 | 74 | def __eq__(self, other): 75 | if set(self._parts.keys()) != set(other._parts.keys()): 76 | return False 77 | return all(self._parts[k] == other._parts[k] for k in self._parts.iterkeys()) 78 | 79 | def __ne__(self, other): 80 | return not self == other 81 | 82 | def __hash__(self): 83 | return hash(self._identifier) 84 | 85 | @property 86 | def scheme(self): 87 | return self._parts['scheme'] 88 | 89 | @property 90 | def auth(self): 91 | return self._parts['auth'] 92 | 93 | @property 94 | def hostname(self): 95 | return self._parts['hostname'] 96 | 97 | @property 98 | def port(self): 99 | return self._parts['port'] 100 | 101 | @property 102 | def path(self): 103 | return self._parts['path'] 104 | 105 | @property 106 | def querystr(self): 107 | return self._parts['querystr'] 108 | 109 | @property 110 | def query(self): 111 | """Return a new instance of query_cls.""" 112 | 113 | if not hasattr(self, '_decoded_query'): 114 | self._decoded_query = list(urls._url_decode_impl( 115 | self.querystr.split('&'), 'utf-8', False, True, 'strict')) 116 | return self.query_cls(self._decoded_query) 117 | 118 | @property 119 | def fragment(self): 120 | return self._parts['fragment'] 121 | 122 | @property 123 | def netloc(self): 124 | return build_netloc(self.hostname, self.auth, self.port) 125 | 126 | def update(self, **kwargs): 127 | vals = dict(self._parts) 128 | if len(kwargs): 129 | vals.update(kwargs) 130 | 131 | return type(self)(unsplit(**vals), query_cls=self.query_cls) 132 | 133 | def update_query(self, qry): 134 | assert isinstance(qry, self.query_cls) 135 | 136 | vals = dict(self._parts) 137 | q = self.query 138 | q.update(qry) 139 | vals['querystr'] = urls.url_encode(q, encode_keys=True, charset=getattr(self, 'encoding', 'utf-8')) 140 | 141 | return type(self)(unsplit(**vals), query_cls=self.query_cls) 142 | 143 | def join(self, other): 144 | if isinstance(other, unicode): 145 | other = IRI(other) 146 | elif isinstance(other, str): 147 | other = URI(other) 148 | 149 | if not isinstance(other, type(self)): 150 | raise TypeError("Expected unicode or {0}: {1}".format( 151 | type(self).__name__, type(other).__name__)) 152 | 153 | vals = dict(self._parts) 154 | 155 | if other.scheme: 156 | if self.scheme: 157 | raise ValueError("cannot join scheme onto %ss with scheme" % 158 | self.__class__.name) 159 | vals['scheme'] = other.scheme 160 | 161 | if other.auth: 162 | if self.auth: 163 | raise ValueError("cannot join auth onto %ss with auth" % 164 | self.__class__.name) 165 | vals['auth'] = other.auth 166 | 167 | if other.hostname: 168 | if self.hostname: 169 | raise ValueError( 170 | "cannot join hostname onto %ss with hostname" % 171 | self.__class__.name) 172 | vals['hostname'] = other.hostname 173 | vals['port'] = other.port 174 | 175 | if other.path: 176 | if self.querystr or self.fragment: 177 | raise ValueError( 178 | "cannot join path onto %ss with querystr or fragment" % 179 | self.__class__.name) 180 | vals['path'] = '/'.join([self.path, other.path]).replace('//', '/') 181 | 182 | if other.querystr: 183 | if self.fragment: 184 | raise ValueError( 185 | "cannot join querystr onto %ss with fragment" % 186 | self.__class__.name) 187 | query = self.query 188 | query.update(other.query) 189 | vals['querystr'] = urls.url_encode(query, encode_keys=True, charset=getattr(self, 'encoding', 'utf-8')) 190 | 191 | if other.fragment: 192 | if self.fragment: 193 | raise ValueError( 194 | "cannot join fragment onto %ss with fragment" % 195 | self.__class__.name) 196 | vals['fragment'] = other.fragment 197 | 198 | return type(self)(unsplit(**vals), query_cls=self.query_cls) 199 | 200 | @classmethod 201 | def from_template(cls, template, **kwargs): 202 | return cls(urls.url_unquote(uri_template(template, **kwargs))) 203 | 204 | 205 | 206 | class IRI(ResourceIdentifier): 207 | 208 | def __init__(self, iri, query_cls=None): 209 | 210 | if isinstance(iri, unicode): 211 | identifier = iri 212 | elif isinstance(iri, ResourceIdentifier): 213 | identifier = unicode(iri) 214 | else: 215 | raise TypeError("iri must be unicode or IRI/URI: %s" 216 | % type(iri).__name__) 217 | 218 | super(IRI, self).__init__(identifier, query_cls) 219 | 220 | def __str__(self): 221 | return urls.iri_to_uri(self._identifier) 222 | 223 | def __unicode__(self): 224 | return self._identifier 225 | 226 | @classmethod 227 | def from_lenient(cls, maybe_gibberish): 228 | return cls(urls.url_fix(maybe_gibberish.encode('utf-8')).decode('utf-8')) 229 | 230 | 231 | class URI(ResourceIdentifier): 232 | 233 | def __init__(self, uri, encoding='utf-8', query_cls=None): 234 | 235 | if isinstance(uri, str): 236 | identifier = urls.iri_to_uri(uri.decode(encoding)) 237 | elif isinstance(uri, ResourceIdentifier): 238 | identifier = str(uri) 239 | else: 240 | raise TypeError("uri must be str or IRI/URI: %s" 241 | % type(uri).__name__) 242 | 243 | super(URI, self).__init__(identifier, query_cls) 244 | self.encoding = encoding 245 | 246 | def __str__(self): 247 | return self._identifier 248 | 249 | def __unicode__(self): 250 | return urls.uri_to_iri(self._identifier) 251 | 252 | @classmethod 253 | def from_lenient(cls, maybe_gibberish): 254 | return cls(urls.url_fix(maybe_gibberish)) 255 | 256 | @classmethod 257 | def from_template(cls, template, **kwargs): 258 | return URI(IRI(urls.url_unquote(uri_template(template, **kwargs)))) 259 | -------------------------------------------------------------------------------- /tests/test_template.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from uricore import URI, IRI 3 | from nose.tools import eq_ 4 | from uricore.template import uri_template 5 | 6 | class OrderedDict(object): 7 | 8 | def __init__(self, items): 9 | self._items = items 10 | 11 | def iteritems(self): 12 | return iter(self._items) 13 | 14 | 15 | # http://tools.ietf.org/html/rfc6570#section-3.2 16 | params = { 17 | 'count': ("one", "two", "three"), 18 | 'dom': ("example", "com"), 19 | 'dub': "me/too", 20 | 'hello': "Hello World!", 21 | 'half': "50%", 22 | 'var': "value", 23 | 'who': "fred", 24 | 'base': "http://example.com/home/", 25 | 'path': "/foo/bar", 26 | 'list': ("red", "green", "blue"), 27 | 'year': ("1965", "2000", "2012"), 28 | 'semi': ';', 29 | 'v': 6, 30 | 'x': 1024, 31 | 'y': 768, 32 | 'empty': "", 33 | 'empty_keys': [], 34 | 'undef': None, 35 | 'list': ["red", "green", "blue"], 36 | 'unicode_keys': {u'gro\xdf':u'great',}, 37 | 'numeric_keys': {1: 'hello'}, 38 | 'keys': OrderedDict([('semi', ";"), ('dot', "."), ('comma', ",")]), 39 | } 40 | 41 | def check_template(template, expansion): 42 | eq_(uri_template(template, **params), expansion) 43 | 44 | 45 | def test_composite_values(): 46 | yield check_template, "find{?year*}", "find?year=1965&year=2000&year=2012" 47 | yield check_template, "www{.dom*}", "www.example.com" 48 | 49 | 50 | def test_form_continuation_expansion(): 51 | yield check_template, "{&who}", "&who=fred" 52 | yield check_template, "{&half}", "&half=50%25" 53 | yield check_template, "?fixed=yes{&x}", "?fixed=yes&x=1024" 54 | yield check_template, "{&x,y,empty}", "&x=1024&y=768&empty=" 55 | yield check_template, "{&x,y,undef}", "&x=1024&y=768" 56 | yield check_template, "{&var:3}", "&var=val" 57 | yield check_template, "{&list}", "&list=red,green,blue" 58 | yield check_template, "{&list*}", "&list=red&list=green&list=blue" 59 | yield check_template, "{&keys}", "&keys=semi,%3B,dot,.,comma,%2C" 60 | yield check_template, "{&keys*}", "&semi=%3B&dot=.&comma=%2C" 61 | 62 | 63 | def test_form_style_expansion(): 64 | yield check_template, "{?who}", "?who=fred" 65 | yield check_template, "{?half}", "?half=50%25" 66 | yield check_template, "{?x,y}", "?x=1024&y=768" 67 | yield check_template, "{?x,y,empty}", "?x=1024&y=768&empty=" 68 | yield check_template, "{?x,y,undef}", "?x=1024&y=768" 69 | yield check_template, "{?var:3}", "?var=val" 70 | yield check_template, "{?list}", "?list=red,green,blue" 71 | yield check_template, "{?list*}", "?list=red&list=green&list=blue" 72 | yield check_template, "{?keys}", "?keys=semi,%3B,dot,.,comma,%2C" 73 | yield check_template, "{?keys*}", "?semi=%3B&dot=.&comma=%2C" 74 | 75 | 76 | def test_fragment_expansion(): 77 | yield check_template, "{#var}", "#value" 78 | yield check_template, "{#hello}", "#Hello%20World!" 79 | yield check_template, "{#half}", "#50%25" 80 | yield check_template, "foo{#empty}", "foo#" 81 | yield check_template, "foo{#undef}", "foo" 82 | yield check_template, "{#x,hello,y}", "#1024,Hello%20World!,768" 83 | yield check_template, "{#path,x}/here", "#/foo/bar,1024/here" 84 | yield check_template, "{#path:6}/here", "#/foo/b/here" 85 | yield check_template, "{#list}", "#red,green,blue" 86 | yield check_template, "{#list*}", "#red,green,blue" 87 | yield check_template, "{#keys}", "#semi,;,dot,.,comma,," 88 | yield check_template, "{#keys*}", "#semi=;,dot=.,comma=," 89 | 90 | 91 | def test_label_expansion(): 92 | yield check_template, "{.who}", ".fred" 93 | yield check_template, "{.who,who}", ".fred.fred" 94 | yield check_template, "{.half,who}", ".50%25.fred" 95 | yield check_template, "www{.dom*}", "www.example.com" 96 | yield check_template, "X{.var}", "X.value" 97 | yield check_template, "X{.empty}", "X." 98 | yield check_template, "X{.undef}", "X" 99 | yield check_template, "X{.var:3}", "X.val" 100 | yield check_template, "X{.list}", "X.red,green,blue" 101 | yield check_template, "X{.list*}", "X.red.green.blue" 102 | yield check_template, "X{.keys}", "X.semi,%3B,dot,.,comma,%2C" 103 | yield check_template, "X{.keys*}", "X.semi=%3B.dot=..comma=%2C" 104 | yield check_template, "X{.empty_keys}", "X" 105 | yield check_template, "X{.empty_keys*}", "X" 106 | 107 | 108 | def test_path_expansion(): 109 | yield check_template, "{/who}", "/fred" 110 | yield check_template, "{/who,who}", "/fred/fred" 111 | yield check_template, "{/half,who}", "/50%25/fred" 112 | yield check_template, "{/who,dub}", "/fred/me%2Ftoo" 113 | yield check_template, "{/var}", "/value" 114 | yield check_template, "{/var,empty}", "/value/" 115 | yield check_template, "{/var,undef}", "/value" 116 | yield check_template, "{/var,x}/here", "/value/1024/here" 117 | yield check_template, "{/var:1,var}", "/v/value" 118 | yield check_template, "{/list}", "/red,green,blue" 119 | yield check_template, "{/list*}", "/red/green/blue" 120 | yield check_template, "{/list*,path:4}", "/red/green/blue/%2Ffoo" 121 | yield check_template, "{/keys}", "/semi,%3B,dot,.,comma,%2C" 122 | yield check_template, "{/keys*}", "/semi=%3B/dot=./comma=%2C" 123 | 124 | 125 | def test_path_style_expansion(): 126 | yield check_template, "{;who}", ";who=fred" 127 | yield check_template, "{;half}", ";half=50%25" 128 | yield check_template, "{;empty}", ";empty" 129 | yield check_template, "{;v,empty,who}", ";v=6;empty;who=fred" 130 | yield check_template, "{;v,bar,who}", ";v=6;who=fred" 131 | yield check_template, "{;x,y}", ";x=1024;y=768" 132 | yield check_template, "{;x,y,empty}", ";x=1024;y=768;empty" 133 | yield check_template, "{;x,y,undef}", ";x=1024;y=768" 134 | yield check_template, "{;hello:5}", ";hello=Hello" 135 | yield check_template, "{;list}", ";list=red,green,blue" 136 | yield check_template, "{;list*}", ";list=red;list=green;list=blue" 137 | yield check_template, "{;keys}", ";keys=semi,%3B,dot,.,comma,%2C" 138 | yield check_template, "{;keys*}", ";semi=%3B;dot=.;comma=%2C" 139 | 140 | 141 | def test_reserved_expansion(): 142 | yield check_template, "{+var}", "value" 143 | yield check_template, "{+hello}", "Hello%20World!" 144 | yield check_template, "{+half}", "50%25" 145 | yield check_template, "{base}index", "http%3A%2F%2Fexample.com%2Fhome%2Findex" 146 | yield check_template, "{+base}index", "http://example.com/home/index" 147 | yield check_template, "O{+empty}X", "OX" 148 | yield check_template, "O{+undef}X", "OX" 149 | yield check_template, "{+path}/here", "/foo/bar/here" 150 | yield check_template, "here?ref={+path}", "here?ref=/foo/bar" 151 | yield check_template, "up{+path}{var}/here", "up/foo/barvalue/here" 152 | yield check_template, "{+x,hello,y}", "1024,Hello%20World!,768" 153 | yield check_template, "{+path,x}/here", "/foo/bar,1024/here" 154 | yield check_template, "{+path:6}/here", "/foo/b/here" 155 | yield check_template, "{+list}", "red,green,blue" 156 | yield check_template, "{+list*}", "red,green,blue" 157 | yield check_template, "{+keys}", "semi,;,dot,.,comma,," 158 | yield check_template, "{+keys*}", "semi=;,dot=.,comma=," 159 | 160 | 161 | def test_simple_string_expansion(): 162 | yield check_template, "{var}", "value" 163 | yield check_template, "{hello}", "Hello%20World%21" 164 | yield check_template, "{half}", "50%25" 165 | yield check_template, "O{empty}X", "OX" 166 | yield check_template, "O{undef}X", "OX" 167 | yield check_template, "{x,y}", "1024,768" 168 | yield check_template, "{x,hello,y}", "1024,Hello%20World%21,768" 169 | yield check_template, "?{x,empty}", "?1024," 170 | yield check_template, "?{x,undef}", "?1024" 171 | yield check_template, "?{undef,y}", "?768" 172 | yield check_template, "{var:3}", "val" 173 | yield check_template, "{var:30}", "value" 174 | yield check_template, "{list}", "red,green,blue" 175 | yield check_template, "{list*}", "red,green,blue" 176 | yield check_template, "{keys}", "semi,%3B,dot,.,comma,%2C" 177 | yield check_template, "{keys*}", "semi=%3B,dot=.,comma=%2C" 178 | 179 | 180 | def test_test_prefix_values(): 181 | yield check_template, "{var}", "value" 182 | yield check_template, "{var:20}", "value" 183 | yield check_template, "{var:3}", "val" 184 | yield check_template, "{semi}", "%3B" 185 | yield check_template, "{semi:2}", "%3B" 186 | 187 | 188 | def test_variable_expansion(): 189 | yield check_template, "{count}", "one,two,three" 190 | yield check_template, "{count*}", "one,two,three" 191 | yield check_template, "{/count}", "/one,two,three" 192 | yield check_template, "{/count*}", "/one/two/three" 193 | yield check_template, "{;count}", ";count=one,two,three" 194 | yield check_template, "{;count*}", ";count=one;count=two;count=three" 195 | yield check_template, "{?count}", "?count=one,two,three" 196 | yield check_template, "{?count*}", "?count=one&count=two&count=three" 197 | yield check_template, "{&count*}", "&count=one&count=two&count=three" 198 | 199 | 200 | def test_uri_template(): 201 | eq_(URI("http://example.com/value"), 202 | URI.from_template("http://example.com/{var}", var="value")) 203 | 204 | 205 | def test_iri_template(): 206 | eq_(IRI(u'http://\u2603/value'), 207 | IRI.from_template(u'http://\N{SNOWMAN}/{var}', var='value')) 208 | eq_(IRI(u'http://\u2603/'), 209 | IRI.from_template(u'http://{domain}/', domain=u"\N{SNOWMAN}")) 210 | 211 | def test_crazy_keys(): 212 | yield check_template, "{?unicode_keys*}", "?gro%C3%9F=great" 213 | yield check_template, "{?numeric_keys*}", "?1=hello" 214 | -------------------------------------------------------------------------------- /uricore/wkz_datastructures.py: -------------------------------------------------------------------------------- 1 | from wkz_internal import _missing 2 | 3 | class BadRequestKeyError(Exception): pass 4 | 5 | def is_immutable(self): 6 | raise TypeError('%r objects are immutable' % self.__class__.__name__) 7 | 8 | 9 | class TypeConversionDict(dict): 10 | """Works like a regular dict but the :meth:`get` method can perform 11 | type conversions. :class:`MultiDict` and :class:`CombinedMultiDict` 12 | are subclasses of this class and provide the same feature. 13 | 14 | .. versionadded:: 0.5 15 | """ 16 | 17 | def get(self, key, default=None, type=None): 18 | """Return the default value if the requested data doesn't exist. 19 | If `type` is provided and is a callable it should convert the value, 20 | return it or raise a :exc:`ValueError` if that is not possible. In 21 | this case the function will return the default as if the value was not 22 | found: 23 | 24 | >>> d = TypeConversionDict(foo='42', bar='blub') 25 | >>> d.get('foo', type=int) 26 | 42 27 | >>> d.get('bar', -1, type=int) 28 | -1 29 | 30 | :param key: The key to be looked up. 31 | :param default: The default value to be returned if the key can't 32 | be looked up. If not further specified `None` is 33 | returned. 34 | :param type: A callable that is used to cast the value in the 35 | :class:`MultiDict`. If a :exc:`ValueError` is raised 36 | by this callable the default value is returned. 37 | """ 38 | try: 39 | rv = self[key] 40 | if type is not None: 41 | rv = type(rv) 42 | except (KeyError, ValueError): 43 | rv = default 44 | return rv 45 | 46 | 47 | class MultiDict(TypeConversionDict): 48 | """A :class:`MultiDict` is a dictionary subclass customized to deal with 49 | multiple values for the same key which is for example used by the parsing 50 | functions in the wrappers. This is necessary because some HTML form 51 | elements pass multiple values for the same key. 52 | 53 | :class:`MultiDict` implements all standard dictionary methods. 54 | Internally, it saves all values for a key as a list, but the standard dict 55 | access methods will only return the first value for a key. If you want to 56 | gain access to the other values, too, you have to use the `list` methods as 57 | explained below. 58 | 59 | Basic Usage: 60 | 61 | >>> d = MultiDict([('a', 'b'), ('a', 'c')]) 62 | >>> d 63 | MultiDict([('a', 'b'), ('a', 'c')]) 64 | >>> d['a'] 65 | 'b' 66 | >>> d.getlist('a') 67 | ['b', 'c'] 68 | >>> 'a' in d 69 | True 70 | 71 | It behaves like a normal dict thus all dict functions will only return the 72 | first value when multiple values for one key are found. 73 | 74 | From Werkzeug 0.3 onwards, the `KeyError` raised by this class is also a 75 | subclass of the :exc:`~exceptions.BadRequest` HTTP exception and will 76 | render a page for a ``400 BAD REQUEST`` if caught in a catch-all for HTTP 77 | exceptions. 78 | 79 | A :class:`MultiDict` can be constructed from an iterable of 80 | ``(key, value)`` tuples, a dict, a :class:`MultiDict` or from Werkzeug 0.2 81 | onwards some keyword parameters. 82 | 83 | :param mapping: the initial value for the :class:`MultiDict`. Either a 84 | regular dict, an iterable of ``(key, value)`` tuples 85 | or `None`. 86 | """ 87 | 88 | def __init__(self, mapping=None): 89 | if isinstance(mapping, MultiDict): 90 | dict.__init__(self, ((k, l[:]) for k, l in mapping.iterlists())) 91 | elif isinstance(mapping, dict): 92 | tmp = {} 93 | for key, value in mapping.iteritems(): 94 | if isinstance(value, (tuple, list)): 95 | value = list(value) 96 | else: 97 | value = [value] 98 | tmp[key] = value 99 | dict.__init__(self, tmp) 100 | else: 101 | tmp = {} 102 | for key, value in mapping or (): 103 | tmp.setdefault(key, []).append(value) 104 | dict.__init__(self, tmp) 105 | 106 | def __getstate__(self): 107 | return dict(self.lists()) 108 | 109 | def __setstate__(self, value): 110 | dict.clear(self) 111 | dict.update(self, value) 112 | 113 | def __iter__(self): 114 | return self.iterkeys() 115 | 116 | def __getitem__(self, key): 117 | """Return the first data value for this key; 118 | raises KeyError if not found. 119 | 120 | :param key: The key to be looked up. 121 | :raise KeyError: if the key does not exist. 122 | """ 123 | if key in self: 124 | return dict.__getitem__(self, key)[0] 125 | raise BadRequestKeyError(key) 126 | 127 | def __setitem__(self, key, value): 128 | """Like :meth:`add` but removes an existing key first. 129 | 130 | :param key: the key for the value. 131 | :param value: the value to set. 132 | """ 133 | dict.__setitem__(self, key, [value]) 134 | 135 | def add(self, key, value): 136 | """Adds a new value for the key. 137 | 138 | .. versionadded:: 0.6 139 | 140 | :param key: the key for the value. 141 | :param value: the value to add. 142 | """ 143 | dict.setdefault(self, key, []).append(value) 144 | 145 | def getlist(self, key, type=None): 146 | """Return the list of items for a given key. If that key is not in the 147 | `MultiDict`, the return value will be an empty list. Just as `get` 148 | `getlist` accepts a `type` parameter. All items will be converted 149 | with the callable defined there. 150 | 151 | :param key: The key to be looked up. 152 | :param type: A callable that is used to cast the value in the 153 | :class:`MultiDict`. If a :exc:`ValueError` is raised 154 | by this callable the value will be removed from the list. 155 | :return: a :class:`list` of all the values for the key. 156 | """ 157 | try: 158 | rv = dict.__getitem__(self, key) 159 | except KeyError: 160 | return [] 161 | if type is None: 162 | return list(rv) 163 | result = [] 164 | for item in rv: 165 | try: 166 | result.append(type(item)) 167 | except ValueError: 168 | pass 169 | return result 170 | 171 | def setlist(self, key, new_list): 172 | """Remove the old values for a key and add new ones. Note that the list 173 | you pass the values in will be shallow-copied before it is inserted in 174 | the dictionary. 175 | 176 | >>> d = MultiDict() 177 | >>> d.setlist('foo', ['1', '2']) 178 | >>> d['foo'] 179 | '1' 180 | >>> d.getlist('foo') 181 | ['1', '2'] 182 | 183 | :param key: The key for which the values are set. 184 | :param new_list: An iterable with the new values for the key. Old values 185 | are removed first. 186 | """ 187 | dict.__setitem__(self, key, list(new_list)) 188 | 189 | def setdefault(self, key, default=None): 190 | """Returns the value for the key if it is in the dict, otherwise it 191 | returns `default` and sets that value for `key`. 192 | 193 | :param key: The key to be looked up. 194 | :param default: The default value to be returned if the key is not 195 | in the dict. If not further specified it's `None`. 196 | """ 197 | if key not in self: 198 | self[key] = default 199 | else: 200 | default = self[key] 201 | return default 202 | 203 | def setlistdefault(self, key, default_list=None): 204 | """Like `setdefault` but sets multiple values. The list returned 205 | is not a copy, but the list that is actually used internally. This 206 | means that you can put new values into the dict by appending items 207 | to the list: 208 | 209 | >>> d = MultiDict({"foo": 1}) 210 | >>> d.setlistdefault("foo").extend([2, 3]) 211 | >>> d.getlist("foo") 212 | [1, 2, 3] 213 | 214 | :param key: The key to be looked up. 215 | :param default: An iterable of default values. It is either copied 216 | (in case it was a list) or converted into a list 217 | before returned. 218 | :return: a :class:`list` 219 | """ 220 | if key not in self: 221 | default_list = list(default_list or ()) 222 | dict.__setitem__(self, key, default_list) 223 | else: 224 | default_list = dict.__getitem__(self, key) 225 | return default_list 226 | 227 | def items(self, multi=False): 228 | """Return a list of ``(key, value)`` pairs. 229 | 230 | :param multi: If set to `True` the list returned will have a 231 | pair for each value of each key. Otherwise it 232 | will only contain pairs for the first value of 233 | each key. 234 | 235 | :return: a :class:`list` 236 | """ 237 | return list(self.iteritems(multi)) 238 | 239 | def lists(self): 240 | """Return a list of ``(key, values)`` pairs, where values is the list of 241 | all values associated with the key. 242 | 243 | :return: a :class:`list` 244 | """ 245 | return list(self.iterlists()) 246 | 247 | def values(self): 248 | """Returns a list of the first value on every key's value list. 249 | 250 | :return: a :class:`list`. 251 | """ 252 | return [self[key] for key in self.iterkeys()] 253 | 254 | def listvalues(self): 255 | """Return a list of all values associated with a key. Zipping 256 | :meth:`keys` and this is the same as calling :meth:`lists`: 257 | 258 | >>> d = MultiDict({"foo": [1, 2, 3]}) 259 | >>> zip(d.keys(), d.listvalues()) == d.lists() 260 | True 261 | 262 | :return: a :class:`list` 263 | """ 264 | return list(self.iterlistvalues()) 265 | 266 | def iteritems(self, multi=False): 267 | """Like :meth:`items` but returns an iterator.""" 268 | for key, values in dict.iteritems(self): 269 | if multi: 270 | for value in values: 271 | yield key, value 272 | else: 273 | yield key, values[0] 274 | 275 | def iterlists(self): 276 | """Like :meth:`items` but returns an iterator.""" 277 | for key, values in dict.iteritems(self): 278 | yield key, list(values) 279 | 280 | def itervalues(self): 281 | """Like :meth:`values` but returns an iterator.""" 282 | for values in dict.itervalues(self): 283 | yield values[0] 284 | 285 | def iterlistvalues(self): 286 | """Like :meth:`listvalues` but returns an iterator.""" 287 | return dict.itervalues(self) 288 | 289 | def copy(self): 290 | """Return a shallow copy of this object.""" 291 | return self.__class__(self) 292 | 293 | def to_dict(self, flat=True): 294 | """Return the contents as regular dict. If `flat` is `True` the 295 | returned dict will only have the first item present, if `flat` is 296 | `False` all values will be returned as lists. 297 | 298 | :param flat: If set to `False` the dict returned will have lists 299 | with all the values in it. Otherwise it will only 300 | contain the first value for each key. 301 | :return: a :class:`dict` 302 | """ 303 | if flat: 304 | return dict(self.iteritems()) 305 | return dict(self.lists()) 306 | 307 | def update(self, other_dict): 308 | """update() extends rather than replaces existing key lists.""" 309 | for key, value in iter_multi_items(other_dict): 310 | MultiDict.add(self, key, value) 311 | 312 | def pop(self, key, default=_missing): 313 | """Pop the first item for a list on the dict. Afterwards the 314 | key is removed from the dict, so additional values are discarded: 315 | 316 | >>> d = MultiDict({"foo": [1, 2, 3]}) 317 | >>> d.pop("foo") 318 | 1 319 | >>> "foo" in d 320 | False 321 | 322 | :param key: the key to pop. 323 | :param default: if provided the value to return if the key was 324 | not in the dictionary. 325 | """ 326 | try: 327 | return dict.pop(self, key)[0] 328 | except KeyError, e: 329 | if default is not _missing: 330 | return default 331 | raise BadRequestKeyError(str(e)) 332 | 333 | def popitem(self): 334 | """Pop an item from the dict.""" 335 | try: 336 | item = dict.popitem(self) 337 | return (item[0], item[1][0]) 338 | except KeyError, e: 339 | raise BadRequestKeyError(str(e)) 340 | 341 | def poplist(self, key): 342 | """Pop the list for a key from the dict. If the key is not in the dict 343 | an empty list is returned. 344 | 345 | .. versionchanged:: 0.5 346 | If the key does no longer exist a list is returned instead of 347 | raising an error. 348 | """ 349 | return dict.pop(self, key, []) 350 | 351 | def popitemlist(self): 352 | """Pop a ``(key, list)`` tuple from the dict.""" 353 | try: 354 | return dict.popitem(self) 355 | except KeyError, e: 356 | raise BadRequestKeyError(str(e)) 357 | 358 | def __copy__(self): 359 | return self.copy() 360 | 361 | def __repr__(self): 362 | return '%s(%r)' % (self.__class__.__name__, self.items(multi=True)) 363 | 364 | 365 | def iter_multi_items(mapping): 366 | """Iterates over the items of a mapping yielding keys and values 367 | without dropping any from more complex structures. 368 | """ 369 | if isinstance(mapping, MultiDict): 370 | for item in mapping.iteritems(multi=True): 371 | yield item 372 | elif isinstance(mapping, dict): 373 | for key, value in mapping.iteritems(): 374 | if isinstance(value, (tuple, list)): 375 | for value in value: 376 | yield key, value 377 | else: 378 | yield key, value 379 | else: 380 | for item in mapping: 381 | yield item 382 | 383 | 384 | class ImmutableDictMixin(object): 385 | """Makes a :class:`dict` immutable. 386 | 387 | .. versionadded:: 0.5 388 | 389 | :private: 390 | """ 391 | _hash_cache = None 392 | 393 | @classmethod 394 | def fromkeys(cls, keys, value=None): 395 | instance = super(cls, cls).__new__(cls) 396 | instance.__init__(zip(keys, repeat(value))) 397 | return instance 398 | 399 | def __reduce_ex__(self, protocol): 400 | return type(self), (dict(self),) 401 | 402 | def _iter_hashitems(self): 403 | return self.iteritems() 404 | 405 | def __hash__(self): 406 | if self._hash_cache is not None: 407 | return self._hash_cache 408 | rv = self._hash_cache = hash(frozenset(self._iter_hashitems())) 409 | return rv 410 | 411 | def setdefault(self, key, default=None): 412 | is_immutable(self) 413 | 414 | def update(self, *args, **kwargs): 415 | is_immutable(self) 416 | 417 | def pop(self, key, default=None): 418 | is_immutable(self) 419 | 420 | def popitem(self): 421 | is_immutable(self) 422 | 423 | def __setitem__(self, key, value): 424 | is_immutable(self) 425 | 426 | def __delitem__(self, key): 427 | is_immutable(self) 428 | 429 | def clear(self): 430 | is_immutable(self) 431 | 432 | 433 | class ImmutableMultiDictMixin(ImmutableDictMixin): 434 | """Makes a :class:`MultiDict` immutable. 435 | 436 | .. versionadded:: 0.5 437 | 438 | :private: 439 | """ 440 | 441 | def __reduce_ex__(self, protocol): 442 | return type(self), (self.items(multi=True),) 443 | 444 | def _iter_hashitems(self): 445 | return self.iteritems(multi=True) 446 | 447 | def add(self, key, value): 448 | is_immutable(self) 449 | 450 | def popitemlist(self): 451 | is_immutable(self) 452 | 453 | def poplist(self, key): 454 | is_immutable(self) 455 | 456 | def setlist(self, key, new_list): 457 | is_immutable(self) 458 | 459 | def setlistdefault(self, key, default_list=None): 460 | is_immutable(self) 461 | 462 | 463 | class ImmutableMultiDict(ImmutableMultiDictMixin, MultiDict): 464 | """An immutable :class:`MultiDict`. 465 | 466 | .. versionadded:: 0.5 467 | """ 468 | 469 | def copy(self): 470 | """Return a shallow mutable copy of this object. Keep in mind that 471 | the standard library's :func:`copy` function is a no-op for this class 472 | like for any other python immutable type (eg: :class:`tuple`). 473 | """ 474 | return MultiDict(self) 475 | 476 | def __copy__(self): 477 | return self 478 | -------------------------------------------------------------------------------- /uricore/wkz_urls.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | werkzeug.urls 4 | ~~~~~~~~~~~~~ 5 | 6 | This module implements various URL related functions. 7 | 8 | :copyright: (c) 2011 by the Werkzeug Team, see AUTHORS for more details. 9 | :license: BSD, see LICENSE for more details. 10 | """ 11 | try: 12 | import urlparse 13 | except ImportError: 14 | import urllib.parse as urlparse 15 | 16 | from wkz_internal import _decode_unicode 17 | from wkz_datastructures import MultiDict, iter_multi_items 18 | from wkz_wsgi import make_chunk_iter 19 | 20 | 21 | #: list of characters that are always safe in URLs. 22 | _always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 23 | 'abcdefghijklmnopqrstuvwxyz' 24 | '0123456789_.-') 25 | _safe_map = dict((c, c) for c in _always_safe) 26 | for i in xrange(0x80): 27 | c = chr(i) 28 | if c not in _safe_map: 29 | _safe_map[c] = '%%%02X' % i 30 | _safe_map.update((chr(i), '%%%02X' % i) for i in xrange(0x80, 0x100)) 31 | _safemaps = {} 32 | 33 | #: lookup table for encoded characters. 34 | _hexdig = '0123456789ABCDEFabcdef' 35 | _hextochr = dict((a + b, chr(int(a + b, 16))) 36 | for a in _hexdig for b in _hexdig) 37 | 38 | 39 | def _quote(s, safe='/', _join=''.join): 40 | assert isinstance(s, str), 'quote only works on bytes' 41 | if not s or not s.rstrip(_always_safe + safe): 42 | return s 43 | try: 44 | quoter = _safemaps[safe] 45 | except KeyError: 46 | safe_map = _safe_map.copy() 47 | safe_map.update([(c, c) for c in safe]) 48 | _safemaps[safe] = quoter = safe_map.__getitem__ 49 | return _join(map(quoter, s)) 50 | 51 | 52 | def _quote_plus(s, safe=''): 53 | if ' ' in s: 54 | return _quote(s, safe + ' ').replace(' ', '+') 55 | return _quote(s, safe) 56 | 57 | 58 | def _safe_urlsplit(s): 59 | """the urlparse.urlsplit cache breaks if it contains unicode and 60 | we cannot control that. So we force type cast that thing back 61 | to what we think it is. 62 | """ 63 | rv = urlparse.urlsplit(s) 64 | # we have to check rv[2] here and not rv[1] as rv[1] will be 65 | # an empty bytestring in case no domain was given. 66 | if type(rv[2]) is not type(s): 67 | assert hasattr(urlparse, 'clear_cache') 68 | urlparse.clear_cache() 69 | rv = urlparse.urlsplit(s) 70 | assert type(rv[2]) is type(s) 71 | return rv 72 | 73 | 74 | def _unquote(s, unsafe=''): 75 | assert isinstance(s, str), 'unquote only works on bytes' 76 | rv = s.split('%') 77 | if len(rv) == 1: 78 | return s 79 | s = rv[0] 80 | for item in rv[1:]: 81 | try: 82 | char = _hextochr[item[:2]] 83 | if char in unsafe: 84 | raise KeyError() 85 | s += char + item[2:] 86 | except KeyError: 87 | s += '%' + item 88 | return s 89 | 90 | 91 | def _unquote_plus(s): 92 | return _unquote(s.replace('+', ' ')) 93 | 94 | 95 | def _uri_split(uri): 96 | """Splits up an URI or IRI.""" 97 | scheme, netloc, path, query, fragment = _safe_urlsplit(uri) 98 | 99 | auth = None 100 | port = None 101 | 102 | if '@' in netloc: 103 | auth, netloc = netloc.split('@', 1) 104 | 105 | if netloc.startswith('['): 106 | host, port_part = netloc[1:].split(']', 1) 107 | if port_part.startswith(':'): 108 | port = port_part[1:] 109 | elif ':' in netloc: 110 | host, port = netloc.split(':', 1) 111 | else: 112 | host = netloc 113 | return scheme, auth, host, port, path, query, fragment 114 | 115 | 116 | def iri_to_uri(iri, charset='utf-8'): 117 | r"""Converts any unicode based IRI to an acceptable ASCII URI. Werkzeug 118 | always uses utf-8 URLs internally because this is what browsers and HTTP 119 | do as well. In some places where it accepts an URL it also accepts a 120 | unicode IRI and converts it into a URI. 121 | 122 | Examples for IRI versus URI: 123 | 124 | >>> iri_to_uri(u'http://☃.net/') 125 | 'http://xn--n3h.net/' 126 | >>> iri_to_uri(u'http://üser:pässword@☃.net/påth') 127 | 'http://%C3%BCser:p%C3%A4ssword@xn--n3h.net/p%C3%A5th' 128 | 129 | .. versionadded:: 0.6 130 | 131 | :param iri: the iri to convert 132 | :param charset: the charset for the URI 133 | """ 134 | iri = unicode(iri) 135 | scheme, auth, hostname, port, path, query, fragment = _uri_split(iri) 136 | 137 | scheme = scheme.encode('ascii') 138 | hostname = hostname.encode('idna') 139 | 140 | if ':' in hostname: 141 | hostname = '[' + hostname + ']' 142 | 143 | if auth: 144 | if ':' in auth: 145 | auth, password = auth.split(':', 1) 146 | else: 147 | password = None 148 | auth = _quote(auth.encode(charset)) 149 | if password: 150 | auth += ':' + _quote(password.encode(charset)) 151 | hostname = auth + '@' + hostname 152 | if port: 153 | hostname += ':' + port 154 | 155 | path = _quote(path.encode(charset), safe="/:~+%") 156 | query = _quote(query.encode(charset), safe="=%&[]:;$()+,!?*/") 157 | 158 | # this absolutely always must return a string. Otherwise some parts of 159 | # the system might perform double quoting (#61) 160 | return str(urlparse.urlunsplit([scheme, hostname, path, query, fragment])) 161 | 162 | 163 | def uri_to_iri(uri, charset='utf-8', errors='replace'): 164 | r"""Converts a URI in a given charset to a IRI. 165 | 166 | Examples for URI versus IRI 167 | 168 | >>> uri_to_iri('http://xn--n3h.net/') 169 | u'http://\u2603.net/' 170 | >>> uri_to_iri('http://%C3%BCser:p%C3%A4ssword@xn--n3h.net/p%C3%A5th') 171 | u'http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th' 172 | 173 | Query strings are left unchanged: 174 | 175 | >>> uri_to_iri('/?foo=24&x=%26%2f') 176 | u'/?foo=24&x=%26%2f' 177 | 178 | .. versionadded:: 0.6 179 | 180 | :param uri: the URI to convert 181 | :param charset: the charset of the URI 182 | :param errors: the error handling on decode 183 | """ 184 | uri = url_fix(str(uri), charset) 185 | scheme, auth, hostname, port, path, query, fragment = _uri_split(uri) 186 | 187 | scheme = _decode_unicode(scheme, 'ascii', errors) 188 | 189 | try: 190 | hostname = hostname.decode('idna') 191 | except UnicodeError: 192 | # dammit, that codec raised an error. Because it does not support 193 | # any error handling we have to fake it.... badly 194 | if errors not in ('ignore', 'replace'): 195 | raise 196 | hostname = hostname.decode('ascii', errors) 197 | 198 | if ':' in hostname: 199 | hostname = '[' + hostname + ']' 200 | 201 | if auth: 202 | if ':' in auth: 203 | auth, password = auth.split(':', 1) 204 | else: 205 | password = None 206 | auth = _decode_unicode(_unquote(auth), charset, errors) 207 | if password: 208 | auth += u':' + _decode_unicode(_unquote(password), 209 | charset, errors) 210 | hostname = auth + u'@' + hostname 211 | if port: 212 | # port should be numeric, but you never know... 213 | hostname += u':' + port.decode(charset, errors) 214 | 215 | path = _decode_unicode(_unquote(path, '/;?'), charset, errors) 216 | query = _decode_unicode(_unquote(query, ';/?:@&=+,$'), 217 | charset, errors) 218 | 219 | return urlparse.urlunsplit([scheme, hostname, path, query, fragment]) 220 | 221 | 222 | def url_decode(s, charset='utf-8', decode_keys=False, include_empty=True, 223 | errors='replace', separator='&', cls=None): 224 | """Parse a querystring and return it as :class:`MultiDict`. Per default 225 | only values are decoded into unicode strings. If `decode_keys` is set to 226 | `True` the same will happen for keys. 227 | 228 | Per default a missing value for a key will default to an empty key. If 229 | you don't want that behavior you can set `include_empty` to `False`. 230 | 231 | Per default encoding errors are ignored. If you want a different behavior 232 | you can set `errors` to ``'replace'`` or ``'strict'``. In strict mode a 233 | `HTTPUnicodeError` is raised. 234 | 235 | .. versionchanged:: 0.5 236 | In previous versions ";" and "&" could be used for url decoding. 237 | This changed in 0.5 where only "&" is supported. If you want to 238 | use ";" instead a different `separator` can be provided. 239 | 240 | The `cls` parameter was added. 241 | 242 | :param s: a string with the query string to decode. 243 | :param charset: the charset of the query string. 244 | :param decode_keys: set to `True` if you want the keys to be decoded 245 | as well. 246 | :param include_empty: Set to `False` if you don't want empty values to 247 | appear in the dict. 248 | :param errors: the decoding error behavior. 249 | :param separator: the pair separator to be used, defaults to ``&`` 250 | :param cls: an optional dict class to use. If this is not specified 251 | or `None` the default :class:`MultiDict` is used. 252 | """ 253 | if cls is None: 254 | cls = MultiDict 255 | return cls(_url_decode_impl(str(s).split(separator), charset, decode_keys, 256 | include_empty, errors)) 257 | 258 | 259 | def url_decode_stream(stream, charset='utf-8', decode_keys=False, 260 | include_empty=True, errors='replace', separator='&', 261 | cls=None, limit=None, return_iterator=False): 262 | """Works like :func:`url_decode` but decodes a stream. The behavior 263 | of stream and limit follows functions like 264 | :func:`~werkzeug.wsgi.make_line_iter`. The generator of pairs is 265 | directly fed to the `cls` so you can consume the data while it's 266 | parsed. 267 | 268 | .. versionadded:: 0.8 269 | 270 | :param stream: a stream with the encoded querystring 271 | :param charset: the charset of the query string. 272 | :param decode_keys: set to `True` if you want the keys to be decoded 273 | as well. 274 | :param include_empty: Set to `False` if you don't want empty values to 275 | appear in the dict. 276 | :param errors: the decoding error behavior. 277 | :param separator: the pair separator to be used, defaults to ``&`` 278 | :param cls: an optional dict class to use. If this is not specified 279 | or `None` the default :class:`MultiDict` is used. 280 | :param limit: the content length of the URL data. Not necessary if 281 | a limited stream is provided. 282 | :param return_iterator: if set to `True` the `cls` argument is ignored 283 | and an iterator over all decoded pairs is 284 | returned 285 | """ 286 | if return_iterator: 287 | cls = lambda x: x 288 | elif cls is None: 289 | cls = MultiDict 290 | pair_iter = make_chunk_iter(stream, separator, limit) 291 | return cls(_url_decode_impl(pair_iter, charset, decode_keys, 292 | include_empty, errors)) 293 | 294 | 295 | def _url_decode_impl(pair_iter, charset, decode_keys, include_empty, 296 | errors): 297 | for pair in pair_iter: 298 | if not pair: 299 | continue 300 | if '=' in pair: 301 | key, value = pair.split('=', 1) 302 | else: 303 | if not include_empty: 304 | continue 305 | key = pair 306 | value = '' 307 | key = _unquote_plus(key) 308 | if decode_keys: 309 | key = _decode_unicode(key, charset, errors) 310 | yield key, url_unquote_plus(value, charset, errors) 311 | 312 | 313 | def url_encode(obj, charset='utf-8', encode_keys=False, sort=False, key=None, 314 | separator='&'): 315 | """URL encode a dict/`MultiDict`. If a value is `None` it will not appear 316 | in the result string. Per default only values are encoded into the target 317 | charset strings. If `encode_keys` is set to ``True`` unicode keys are 318 | supported too. 319 | 320 | If `sort` is set to `True` the items are sorted by `key` or the default 321 | sorting algorithm. 322 | 323 | .. versionadded:: 0.5 324 | `sort`, `key`, and `separator` were added. 325 | 326 | :param obj: the object to encode into a query string. 327 | :param charset: the charset of the query string. 328 | :param encode_keys: set to `True` if you have unicode keys. 329 | :param sort: set to `True` if you want parameters to be sorted by `key`. 330 | :param separator: the separator to be used for the pairs. 331 | :param key: an optional function to be used for sorting. For more details 332 | check out the :func:`sorted` documentation. 333 | """ 334 | return separator.join(_url_encode_impl(obj, charset, encode_keys, sort, key)) 335 | 336 | 337 | def url_encode_stream(obj, stream=None, charset='utf-8', encode_keys=False, 338 | sort=False, key=None, separator='&'): 339 | """Like :meth:`url_encode` but writes the results to a stream 340 | object. If the stream is `None` a generator over all encoded 341 | pairs is returned. 342 | 343 | .. versionadded:: 0.8 344 | 345 | :param obj: the object to encode into a query string. 346 | :param stream: a stream to write the encoded object into or `None` if 347 | an iterator over the encoded pairs should be returned. In 348 | that case the separator argument is ignored. 349 | :param charset: the charset of the query string. 350 | :param encode_keys: set to `True` if you have unicode keys. 351 | :param sort: set to `True` if you want parameters to be sorted by `key`. 352 | :param separator: the separator to be used for the pairs. 353 | :param key: an optional function to be used for sorting. For more details 354 | check out the :func:`sorted` documentation. 355 | """ 356 | gen = _url_encode_impl(obj, charset, encode_keys, sort, key) 357 | if stream is None: 358 | return gen 359 | for idx, chunk in enumerate(gen): 360 | if idx: 361 | stream.write(separator) 362 | stream.write(chunk) 363 | 364 | 365 | def _url_encode_impl(obj, charset, encode_keys, sort, key): 366 | iterable = iter_multi_items(obj) 367 | if sort: 368 | iterable = sorted(iterable, key=key) 369 | for key, value in iterable: 370 | if value is None: 371 | continue 372 | if encode_keys and isinstance(key, unicode): 373 | key = key.encode(charset) 374 | else: 375 | key = str(key) 376 | if isinstance(value, unicode): 377 | value = value.encode(charset) 378 | else: 379 | value = str(value) 380 | yield '%s=%s' % (_quote(key), _quote_plus(value)) 381 | 382 | 383 | def url_quote(s, charset='utf-8', safe='/:'): 384 | """URL encode a single string with a given encoding. 385 | 386 | :param s: the string to quote. 387 | :param charset: the charset to be used. 388 | :param safe: an optional sequence of safe characters. 389 | """ 390 | if isinstance(s, unicode): 391 | s = s.encode(charset) 392 | elif not isinstance(s, str): 393 | s = str(s) 394 | return _quote(s, safe=safe) 395 | 396 | 397 | def url_quote_plus(s, charset='utf-8', safe=''): 398 | """URL encode a single string with the given encoding and convert 399 | whitespace to "+". 400 | 401 | :param s: the string to quote. 402 | :param charset: the charset to be used. 403 | :param safe: an optional sequence of safe characters. 404 | """ 405 | if isinstance(s, unicode): 406 | s = s.encode(charset) 407 | elif not isinstance(s, str): 408 | s = str(s) 409 | return _quote_plus(s, safe=safe) 410 | 411 | 412 | def url_unquote(s, charset='utf-8', errors='replace'): 413 | """URL decode a single string with a given decoding. 414 | 415 | Per default encoding errors are ignored. If you want a different behavior 416 | you can set `errors` to ``'replace'`` or ``'strict'``. In strict mode a 417 | `HTTPUnicodeError` is raised. 418 | 419 | :param s: the string to unquote. 420 | :param charset: the charset to be used. 421 | :param errors: the error handling for the charset decoding. 422 | """ 423 | if isinstance(s, unicode): 424 | s = s.encode(charset) 425 | return _decode_unicode(_unquote(s), charset, errors) 426 | 427 | 428 | def url_unquote_plus(s, charset='utf-8', errors='replace'): 429 | """URL decode a single string with the given decoding and decode 430 | a "+" to whitespace. 431 | 432 | Per default encoding errors are ignored. If you want a different behavior 433 | you can set `errors` to ``'replace'`` or ``'strict'``. In strict mode a 434 | `HTTPUnicodeError` is raised. 435 | 436 | :param s: the string to unquote. 437 | :param charset: the charset to be used. 438 | :param errors: the error handling for the charset decoding. 439 | """ 440 | if isinstance(s, unicode): 441 | s = s.encode(charset) 442 | return _decode_unicode(_unquote_plus(s), charset, errors) 443 | 444 | 445 | def url_fix(s, charset='utf-8'): 446 | r"""Sometimes you get an URL by a user that just isn't a real URL because 447 | it contains unsafe characters like ' ' and so on. This function can fix 448 | some of the problems in a similar way browsers handle data entered by the 449 | user: 450 | 451 | >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)') 452 | 'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29' 453 | 454 | :param s: the string with the URL to fix. 455 | :param charset: The target charset for the URL if the url was given as 456 | unicode string. 457 | """ 458 | if isinstance(s, unicode): 459 | s = s.encode(charset, 'replace') 460 | scheme, netloc, path, qs, anchor = _safe_urlsplit(s) 461 | path = _quote(path, '/%') 462 | qs = _quote_plus(qs, ':&%=') 463 | return urlparse.urlunsplit((scheme, netloc, path, qs, anchor)) 464 | --------------------------------------------------------------------------------