├── .coveragerc
├── .gitignore
├── .pre-commit-config.yaml
├── .travis.yml
├── Makefile
├── README.md
├── UNLICENSE
├── bin
└── generate-tlds
├── pylintrc
├── requirements.txt
├── requirements_dev.txt
├── setup.py
├── tests
├── __init__.py
├── _urlparse_less_special_test.py
├── doc_test.py
├── encoding_test.py
├── search_test.py
└── urllib_utf8_test.py
├── tox.ini
└── yelp_uri
├── __init__.py
├── _urlparse_less_special.py
├── encoding.py
├── search.py
├── tlds
├── __init__.py
├── all.py
└── common.py
└── urllib_utf8.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | timid = True
4 | source = .
5 | omit =
6 | .tox/*
7 | /usr/*
8 | setup.py
9 | venv/*
10 |
11 | [report]
12 | exclude_lines =
13 | # Have to re-enable the standard pragma
14 | \#\s*pragma: no cover
15 |
16 | # Don't complain if tests don't hit defensive assertion code:
17 | ^\s*raise AssertionError\b
18 | ^\s*raise NotImplementedError\b
19 | ^\s*return NotImplemented\b
20 | ^\s*raise$
21 |
22 | # Don't complain if non-runnable code isn't run:
23 | ^if __name__ == ['"]__main__['"]:$
24 |
25 | [html]
26 | directory = coverage-html
27 |
28 | # vim:ft=dosini
29 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | *.iml
3 | *.py[co]
4 | .*.sw[a-z]
5 | .coverage
6 | .idea
7 | .pre-commit-files
8 | .project
9 | .pydevproject
10 | .tox
11 | .venv.touch
12 | /venv*
13 | coverage-html
14 | dist
15 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: git@github.com:pre-commit/pre-commit-hooks
3 | rev: v4.4.0
4 | hooks:
5 | - id: trailing-whitespace
6 | files: \.(py|sh|yaml)$
7 | - id: end-of-file-fixer
8 | files: \.(py|sh|yaml)$
9 | - id: check-yaml
10 | files: \.(yaml|yml)$
11 | - id: debug-statements
12 | files: \.py$
13 | - id: name-tests-test
14 | files: tests/.+\.py$
15 | - id: fix-encoding-pragma
16 | args:
17 | - --remove
18 | language_version: python3.8
19 | - repo: http://github.com/asottile/reorder_python_imports
20 | rev: v3.10.0
21 | hooks:
22 | - id: reorder-python-imports
23 | - repo: http://github.com/asottile/pyupgrade
24 | rev: v3.10.1
25 | hooks:
26 | - id: pyupgrade
27 | args: ['--py38-plus']
28 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | matrix:
3 | include:
4 | - env: TOXENV=py27
5 | - env: TOXENV=py36
6 | python: 3.6
7 | install: pip install tox
8 | script: tox
9 | cache:
10 | directories:
11 | - $HOME/.cache/pip
12 | - $HOME/.cache/pre-commit
13 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | export PATH := $(PWD)/bin:$(PWD)/venv/bin:$(PATH)
2 |
3 | REBUILD_FLAG =
4 |
5 | .PHONY: all
6 | all: venv test
7 |
8 | venv: .venv.touch
9 | rm -rf venv
10 | virtualenv venv --python python3.8
11 | pip install -r requirements_dev.txt
12 |
13 | .PHONY: tests test
14 | tests: test
15 | test: venv
16 | tox $(REBUILD_FLAG)
17 |
18 |
19 | .venv.touch: setup.py requirements.txt requirements_dev.txt
20 | $(eval REBUILD_FLAG := --recreate)
21 | touch .venv.touch
22 |
23 |
24 | .PHONY: clean
25 | clean:
26 | find . -iname '*.pyc' | xargs rm -f
27 | rm -rf .tox
28 | rm -rf ./venv
29 | rm -f .venv.touch
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # yelp\_uri
2 |
3 | [](https://travis-ci.org/Yelp/yelp\_uri)
4 |
5 |
6 | ## Installation
7 |
8 | For a primer on pip and virtualenv, see the [Python Packaging User Guide](https://python-packaging-user-guide.readthedocs.org/en/latest/tutorial.html).
9 |
10 | TL;DR: `pip install yelp_uri`
11 |
12 |
13 | ## Usage
14 |
15 | Make a well-encoded URI from user input.
16 |
17 | ```python
18 | >>> weird_uri = 'http://münch.com/münch?one=m%C3%BCnch#m%FCnch'
19 |
20 | >>> import yelp_uri.encoding as E
21 | >>> well_encoded = E.recode_uri(weird_uri)
22 | >>> print(well_encoded)
23 | http://xn--mnch-0ra.com/m%C3%BCnch?one=m%C3%BCnch#m%C3%BCnch
24 |
25 | ```
26 |
27 | Make a user-readable url, from either a well-encoded url or user input:
28 |
29 | ```python
30 | >>> print(E.decode_uri(well_encoded))
31 | http://münch.com/münch?one=münch#münch
32 | >>> print(E.decode_uri(weird_uri))
33 | http://münch.com/münch?one=münch#münch
34 |
35 | ```
36 |
37 |
38 |
39 | `yelp_uri.search` has regexes for finding URLs in user-generated plaintext.
40 |
41 | ```python
42 | >>> plaintext = '''
43 | ... Reference: http://en.wikipedia.org/wiki/Eon_(geology)
44 | ... Follow @YelpCincy on Twitter (http://twitter.com/YelpCincy)
45 | ... '''
46 | >>> from yelp_uri.search import url_regex
47 | >>> for url in url_regex.finditer(plaintext): print(url.group())
48 | http://en.wikipedia.org/wiki/Eon_(geology)
49 | http://twitter.com/YelpCincy
50 |
51 | ```
52 |
--------------------------------------------------------------------------------
/UNLICENSE:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
25 |
--------------------------------------------------------------------------------
/bin/generate-tlds:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | This reads the IANA-maintained list of tlds and formats/outputs them for use
4 | in the domains regular expression. To regenerate:
5 | ./bin/generate-tlds > yelp_uri/tlds/all.py
6 | """
7 | import sys
8 |
9 | import urllib2
10 |
11 |
12 | def main(url='http://data.iana.org/TLD/tlds-alpha-by-domain.txt'):
13 | try:
14 | domain_data = urllib2.urlopen(url)
15 | except urllib2.URLError as e:
16 | print(
17 | "Could not get the domains from the given URL. Perhaps the IANA"
18 | "has changed the location of the file or it no longer exists."
19 | )
20 | return e.reason
21 |
22 | # Convert all newlines except the last one to '|', so 'foo\nbar\n' -> 'foo|bar'.
23 | # Ignores all lines starting with '#', which is a comment in the text file.
24 | data = (
25 | line.lower()
26 | for line in domain_data.read().splitlines()
27 | if not line.startswith("#") and line.strip()
28 | )
29 |
30 | tlds = set()
31 | for datum in data:
32 | # get both the punycoded and unicoded versions:
33 | tlds.add(datum.decode('utf-8'))
34 | tlds.add(datum.decode('idna'))
35 |
36 | domains_string = "',\n '".join(sorted(tlds))
37 |
38 | print('''\
39 | # -*- coding: utf-8 -*-
40 | from __future__ import unicode_literals
41 | # Generated automatically. To regenerate:
42 | # ./bin/generate-tlds > yelp_uri/tlds/all.py
43 | all_tlds = '|'.join((
44 | '{}',
45 | ))'''.format(domains_string).encode('UTF-8'))
46 |
47 |
48 | if __name__ == "__main__":
49 | sys.exit(main())
50 |
--------------------------------------------------------------------------------
/pylintrc:
--------------------------------------------------------------------------------
1 | [MESSAGES CONTROL]
2 | disable=
3 | locally-disabled,
4 | missing-docstring,
5 | maybe-no-member,
6 | redefined-variable-type,
7 | redundant-keyword-arg,
8 | too-many-function-args,
9 |
10 |
11 | [REPORTS]
12 | output-format=colorized
13 | reports=no
14 |
15 | [BASIC]
16 | #const-rgx=(([A-Za-z_][A-Za-z0-9_]*)|(__.*__))$
17 | const-rgx=(([A-Za-z_][A-Za-z0-9_]*)|(__.*__))$
18 |
19 | #function-rgx=[a-z_][a-z0-9_]{2,30}$
20 | function-rgx=[a-z_][a-z0-9_]{2,60}$
21 |
22 | #method-rgx=[a-z_][a-z0-9_]{2,30}$
23 | method-rgx=(%(function-rgx)s|%(const-rgx)s)
24 |
25 | #variable-rgx=[a-z_][a-z0-9_]{2,30}$
26 | variable-rgx=[a-z_][a-z0-9_]{0,30}$
27 |
28 | [FORMAT]
29 | max-line-length=131
30 |
31 | [TYPECHECK]
32 | ignored-classes=
33 | pytest,
34 | RFC3986,
35 | _MovedItems,
36 |
37 | [DESIGN]
38 | min-public-methods=0
39 |
40 | # vim:ft=dosini:
41 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | .
2 |
--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | coverage
2 | flake8
3 | mock
4 | pytest
5 | pre-commit
6 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages
2 | from setuptools import setup
3 |
4 |
5 | def main():
6 | setup(
7 | name='yelp_uri',
8 | version='2.0.1',
9 | description="Uri utilities maintained by Yelp",
10 | url='https://github.com/Yelp/yelp_uri',
11 | author='Buck Golemon',
12 | author_email='buck@yelp.com',
13 | platforms='all',
14 | classifiers=[
15 | 'License :: Public Domain',
16 | 'Programming Language :: Python :: 3.8',
17 | ],
18 | packages=find_packages(exclude=('tests*',)),
19 | install_requires=[
20 | 'yelp_encodings',
21 | 'yelp_bytes'
22 | ],
23 | options={
24 | 'bdist_wheel': {
25 | 'universal': 1,
26 | }
27 | },
28 | )
29 |
30 |
31 | if __name__ == '__main__':
32 | exit(main())
33 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yelp/yelp_uri/8688042e6579bc235e8b2ddd2b552c5be84ba674/tests/__init__.py
--------------------------------------------------------------------------------
/tests/_urlparse_less_special_test.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | """Tests are also pulled from stdlib 2.6
3 | This file is space-indented to ease merging from upstream.
4 |
5 | http://hg.python.org/cpython/raw-file/4a17784f2fee/Lib/test/test_urlparse.py
6 | """
7 | import unittest
8 |
9 | import yelp_uri._urlparse_less_special as urlparse
10 |
11 |
12 | RFC1808_BASE = "http://a/b/c/d;p?q#f"
13 | RFC2396_BASE = "http://a/b/c/d;p?q"
14 | RFC3986_BASE = 'http://a/b/c/d;p?q'
15 | SIMPLE_BASE = 'http://a/b/c/d'
16 |
17 | # A list of test cases. Each test case is a a two-tuple that contains
18 | # a string with the query and a dictionary with the expected result.
19 |
20 | parse_qsl_test_cases = [
21 | ("", []),
22 | ("&", []),
23 | ("&&", []),
24 | ("=", [('', '')]),
25 | ("=a", [('', 'a')]),
26 | ("a", [('a', '')]),
27 | ("a=", [('a', '')]),
28 | ("a=", [('a', '')]),
29 | ("&a=b", [('a', 'b')]),
30 | ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
31 | ("a=1&a=2", [('a', '1'), ('a', '2')]),
32 | ]
33 |
34 | parse_qs_test_cases = [
35 | ("", {}),
36 | ("&", {}),
37 | ("&&", {}),
38 | ("=", {'': ['']}),
39 | ("=a", {'': ['a']}),
40 | ("a", {'a': ['']}),
41 | ("a=", {'a': ['']}),
42 | ("&a=b", {'a': ['b']}),
43 | ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
44 | ("a=1&a=2", {'a': ['1', '2']}),
45 | ]
46 |
47 |
48 | class UrlParseTestCase(unittest.TestCase):
49 | def checkRoundtrips(self, url, parsed, split):
50 | result = urlparse.urlparse(url)
51 | self.assertEqual(result, parsed)
52 | t = (result.scheme, result.netloc, result.path,
53 | result.params, result.query, result.fragment)
54 | self.assertEqual(t, parsed)
55 | # put it back together and it should be the same
56 | result2 = urlparse.urlunparse(result)
57 | self.assertEqual(result2, url)
58 | self.assertEqual(result2, result.geturl())
59 |
60 | # the result of geturl() is a fixpoint; we can always parse it
61 | # again to get the same result:
62 | result3 = urlparse.urlparse(result.geturl())
63 | self.assertEqual(result3.geturl(), result.geturl())
64 | self.assertEqual(result3, result)
65 | self.assertEqual(result3.scheme, result.scheme)
66 | self.assertEqual(result3.netloc, result.netloc)
67 | self.assertEqual(result3.path, result.path)
68 | self.assertEqual(result3.params, result.params)
69 | self.assertEqual(result3.query, result.query)
70 | self.assertEqual(result3.fragment, result.fragment)
71 | self.assertEqual(result3.username, result.username)
72 | self.assertEqual(result3.password, result.password)
73 | self.assertEqual(result3.hostname, result.hostname)
74 | self.assertEqual(result3.port, result.port)
75 |
76 | # check the roundtrip using urlsplit() as well
77 | result = urlparse.urlsplit(url)
78 | self.assertEqual(result, split)
79 | t = (result.scheme, result.netloc, result.path,
80 | result.query, result.fragment)
81 | self.assertEqual(t, split)
82 | result2 = urlparse.urlunsplit(result)
83 | self.assertEqual(result2, url)
84 | self.assertEqual(result2, result.geturl())
85 |
86 | # check the fixpoint property of re-parsing the result of geturl()
87 | result3 = urlparse.urlsplit(result.geturl())
88 | self.assertEqual(result3.geturl(), result.geturl())
89 | self.assertEqual(result3, result)
90 | self.assertEqual(result3.scheme, result.scheme)
91 | self.assertEqual(result3.netloc, result.netloc)
92 | self.assertEqual(result3.path, result.path)
93 | self.assertEqual(result3.query, result.query)
94 | self.assertEqual(result3.fragment, result.fragment)
95 | self.assertEqual(result3.username, result.username)
96 | self.assertEqual(result3.password, result.password)
97 | self.assertEqual(result3.hostname, result.hostname)
98 | self.assertEqual(result3.port, result.port)
99 |
100 | def test_qsl(self):
101 | for orig, expect in parse_qsl_test_cases:
102 | result = urlparse.parse_qsl(orig, keep_blank_values=True)
103 | self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
104 |
105 | def test_qs(self):
106 | for orig, expect in parse_qs_test_cases:
107 | result = urlparse.parse_qs(orig, keep_blank_values=True)
108 | self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
109 |
110 | def test_roundtrips(self):
111 | testcases = [
112 | ('file:///tmp/junk.txt',
113 | ('file', '', '/tmp/junk.txt', '', '', ''),
114 | ('file', '', '/tmp/junk.txt', '', '')),
115 | ('imap://mail.python.org/mbox1',
116 | ('imap', 'mail.python.org', '/mbox1', '', '', ''),
117 | ('imap', 'mail.python.org', '/mbox1', '', '')),
118 | ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
119 | ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
120 | '', '', ''),
121 | ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
122 | '', '')),
123 | ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
124 | ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
125 | '', '', ''),
126 | ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
127 | '', '')),
128 | ('git+ssh://git@github.com/user/project.git',
129 | ('git+ssh', 'git@github.com', '/user/project.git',
130 | '', '', ''),
131 | ('git+ssh', 'git@github.com', '/user/project.git',
132 | '', ''))
133 | ]
134 | for url, parsed, split in testcases:
135 | self.checkRoundtrips(url, parsed, split)
136 |
137 | def test_http_roundtrips(self):
138 | # urlparse.urlsplit treats 'http:' as an optimized special case,
139 | # so we test both 'http:' and 'https:' in all the following.
140 | # Three cheers for white box knowledge!
141 | testcases = [
142 | ('://www.python.org',
143 | ('www.python.org', '', '', '', ''),
144 | ('www.python.org', '', '', '')),
145 | ('://www.python.org#abc',
146 | ('www.python.org', '', '', '', 'abc'),
147 | ('www.python.org', '', '', 'abc')),
148 | ('://www.python.org?q=abc',
149 | ('www.python.org', '', '', 'q=abc', ''),
150 | ('www.python.org', '', 'q=abc', '')),
151 | ('://www.python.org/#abc',
152 | ('www.python.org', '/', '', '', 'abc'),
153 | ('www.python.org', '/', '', 'abc')),
154 | ('://a/b/c/d;p?q#f',
155 | ('a', '/b/c/d', 'p', 'q', 'f'),
156 | ('a', '/b/c/d;p', 'q', 'f')),
157 | ]
158 | for scheme in ('http', 'https'):
159 | for url, parsed, split in testcases:
160 | url = scheme + url
161 | parsed = (scheme,) + parsed
162 | split = (scheme,) + split
163 | self.checkRoundtrips(url, parsed, split)
164 |
165 | def checkJoin(self, base, relurl, expected):
166 | self.assertEqual(urlparse.urljoin(base, relurl), expected,
167 | (base, relurl, expected))
168 |
169 | def test_unparse_parse(self):
170 | for u in ['Python', './Python', 'x-newscheme://foo.com/stuff', 'x://y', 'x:/y', 'x:/', '/', ]:
171 | self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
172 | self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
173 |
174 | def test_RFC1808(self):
175 | # "normal" cases from RFC 1808:
176 | self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
177 | self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
178 | self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
179 | self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
180 | self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
181 | self.checkJoin(RFC1808_BASE, '//g', 'http://g')
182 | self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
183 | self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
184 | self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
185 | self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
186 | self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
187 | self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
188 | self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
189 | self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
190 | self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
191 | self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
192 | self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
193 | self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
194 | self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
195 | self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
196 | self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
197 | self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
198 |
199 | # "abnormal" cases from RFC 1808:
200 | self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
201 | self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
202 | self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
203 | self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
204 | self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
205 | self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
206 | self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
207 | self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
208 | self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
209 | self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
210 | self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
211 | self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
212 | self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
213 |
214 | # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
215 | # so we'll not actually run these tests (which expect 1808 behavior).
216 | # self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
217 | # self.checkJoin(RFC1808_BASE, 'http:', 'http:')
218 |
219 | def test_RFC2396(self):
220 | # cases from RFC 2396
221 |
222 | self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
223 | self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
224 | self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
225 | self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
226 | self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
227 | self.checkJoin(RFC2396_BASE, '//g', 'http://g')
228 | self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
229 | self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
230 | self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
231 | self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
232 | self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
233 | self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
234 | self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
235 | self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
236 | self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
237 | self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
238 | self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
239 | self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
240 | self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
241 | self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
242 | self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
243 | self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
244 | self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
245 | self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
246 | self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
247 | self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
248 | self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
249 | self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
250 | self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
251 | self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
252 | self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
253 | self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
254 | self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
255 | self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
256 | self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
257 | self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
258 | self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
259 | self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
260 | self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
261 |
262 | def test_RFC3986(self):
263 | # Test cases from RFC3986
264 | self.checkJoin(RFC3986_BASE, '?y', 'http://a/b/c/d;p?y')
265 | self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
266 | self.checkJoin(RFC3986_BASE, 'g:h', 'g:h')
267 | self.checkJoin(RFC3986_BASE, 'g', 'http://a/b/c/g')
268 | self.checkJoin(RFC3986_BASE, './g', 'http://a/b/c/g')
269 | self.checkJoin(RFC3986_BASE, 'g/', 'http://a/b/c/g/')
270 | self.checkJoin(RFC3986_BASE, '/g', 'http://a/g')
271 | self.checkJoin(RFC3986_BASE, '//g', 'http://g')
272 | self.checkJoin(RFC3986_BASE, '?y', 'http://a/b/c/d;p?y')
273 | self.checkJoin(RFC3986_BASE, 'g?y', 'http://a/b/c/g?y')
274 | self.checkJoin(RFC3986_BASE, '#s', 'http://a/b/c/d;p?q#s')
275 | self.checkJoin(RFC3986_BASE, 'g#s', 'http://a/b/c/g#s')
276 | self.checkJoin(RFC3986_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
277 | self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
278 | self.checkJoin(RFC3986_BASE, 'g;x', 'http://a/b/c/g;x')
279 | self.checkJoin(RFC3986_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
280 | self.checkJoin(RFC3986_BASE, '', 'http://a/b/c/d;p?q')
281 | self.checkJoin(RFC3986_BASE, '.', 'http://a/b/c/')
282 | self.checkJoin(RFC3986_BASE, './', 'http://a/b/c/')
283 | self.checkJoin(RFC3986_BASE, '..', 'http://a/b/')
284 | self.checkJoin(RFC3986_BASE, '../', 'http://a/b/')
285 | self.checkJoin(RFC3986_BASE, '../g', 'http://a/b/g')
286 | self.checkJoin(RFC3986_BASE, '../..', 'http://a/')
287 | self.checkJoin(RFC3986_BASE, '../../', 'http://a/')
288 | self.checkJoin(RFC3986_BASE, '../../g', 'http://a/g')
289 |
290 | # Abnormal Examples
291 |
292 | # The 'abnormal scenarios' are incompatible with RFC2986 parsing
293 | # Tests are here for reference.
294 |
295 | # self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
296 | # self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
297 | # self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
298 | # self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
299 |
300 | self.checkJoin(RFC3986_BASE, 'g.', 'http://a/b/c/g.')
301 | self.checkJoin(RFC3986_BASE, '.g', 'http://a/b/c/.g')
302 | self.checkJoin(RFC3986_BASE, 'g..', 'http://a/b/c/g..')
303 | self.checkJoin(RFC3986_BASE, '..g', 'http://a/b/c/..g')
304 | self.checkJoin(RFC3986_BASE, './../g', 'http://a/b/g')
305 | self.checkJoin(RFC3986_BASE, './g/.', 'http://a/b/c/g/')
306 | self.checkJoin(RFC3986_BASE, 'g/./h', 'http://a/b/c/g/h')
307 | self.checkJoin(RFC3986_BASE, 'g/../h', 'http://a/b/c/h')
308 | self.checkJoin(RFC3986_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
309 | self.checkJoin(RFC3986_BASE, 'g;x=1/../y', 'http://a/b/c/y')
310 | self.checkJoin(RFC3986_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
311 | self.checkJoin(RFC3986_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
312 | self.checkJoin(RFC3986_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
313 | self.checkJoin(RFC3986_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
314 | # self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
315 | self.checkJoin(RFC3986_BASE, 'http:g', 'http://a/b/c/g') # relaxed parser
316 |
317 | def test_urljoins(self):
318 | self.checkJoin(SIMPLE_BASE, 'g:h', 'g:h')
319 | self.checkJoin(SIMPLE_BASE, 'http:g', 'http://a/b/c/g')
320 | self.checkJoin(SIMPLE_BASE, 'http:', 'http://a/b/c/d')
321 | self.checkJoin(SIMPLE_BASE, 'g', 'http://a/b/c/g')
322 | self.checkJoin(SIMPLE_BASE, './g', 'http://a/b/c/g')
323 | self.checkJoin(SIMPLE_BASE, 'g/', 'http://a/b/c/g/')
324 | self.checkJoin(SIMPLE_BASE, '/g', 'http://a/g')
325 | self.checkJoin(SIMPLE_BASE, '//g', 'http://g')
326 | self.checkJoin(SIMPLE_BASE, '?y', 'http://a/b/c/d?y')
327 | self.checkJoin(SIMPLE_BASE, 'g?y', 'http://a/b/c/g?y')
328 | self.checkJoin(SIMPLE_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
329 | self.checkJoin(SIMPLE_BASE, '.', 'http://a/b/c/')
330 | self.checkJoin(SIMPLE_BASE, './', 'http://a/b/c/')
331 | self.checkJoin(SIMPLE_BASE, '..', 'http://a/b/')
332 | self.checkJoin(SIMPLE_BASE, '../', 'http://a/b/')
333 | self.checkJoin(SIMPLE_BASE, '../g', 'http://a/b/g')
334 | self.checkJoin(SIMPLE_BASE, '../..', 'http://a/')
335 | self.checkJoin(SIMPLE_BASE, '../../g', 'http://a/g')
336 | self.checkJoin(SIMPLE_BASE, '../../../g', 'http://a/../g')
337 | self.checkJoin(SIMPLE_BASE, './../g', 'http://a/b/g')
338 | self.checkJoin(SIMPLE_BASE, './g/.', 'http://a/b/c/g/')
339 | self.checkJoin(SIMPLE_BASE, '/./g', 'http://a/./g')
340 | self.checkJoin(SIMPLE_BASE, 'g/./h', 'http://a/b/c/g/h')
341 | self.checkJoin(SIMPLE_BASE, 'g/../h', 'http://a/b/c/h')
342 | self.checkJoin(SIMPLE_BASE, 'http:g', 'http://a/b/c/g')
343 | self.checkJoin(SIMPLE_BASE, 'http:', 'http://a/b/c/d')
344 | self.checkJoin(SIMPLE_BASE, 'http:?y', 'http://a/b/c/d?y')
345 | self.checkJoin(SIMPLE_BASE, 'http:g?y', 'http://a/b/c/g?y')
346 | self.checkJoin(SIMPLE_BASE, 'http:g?y/./x', 'http://a/b/c/g?y/./x')
347 |
348 | def test_urldefrag(self):
349 | for url, defrag, frag in [
350 | ('http://python.org#frag', 'http://python.org', 'frag'),
351 | ('http://python.org', 'http://python.org', ''),
352 | ('http://python.org/#frag', 'http://python.org/', 'frag'),
353 | ('http://python.org/', 'http://python.org/', ''),
354 | ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
355 | ('http://python.org/?q', 'http://python.org/?q', ''),
356 | ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
357 | ('http://python.org/p?q', 'http://python.org/p?q', ''),
358 | (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
359 | (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
360 | ]:
361 | self.assertEqual(urlparse.urldefrag(url), (defrag, frag))
362 |
363 | def test_urlsplit_attributes(self):
364 | url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
365 | p = urlparse.urlsplit(url)
366 | self.assertEqual(p.scheme, "http")
367 | self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
368 | self.assertEqual(p.path, "/doc/")
369 | self.assertEqual(p.query, "")
370 | self.assertEqual(p.fragment, "frag")
371 | self.assertEqual(p.username, None)
372 | self.assertEqual(p.password, None)
373 | self.assertEqual(p.hostname, "WWW.PYTHON.ORG")
374 | self.assertEqual(p.port, None)
375 | # geturl() won't return exactly the original URL in this case
376 | # since the scheme is always case-normalized
377 | # self.assertEqual(p.geturl(), url)
378 |
379 | url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
380 | p = urlparse.urlsplit(url)
381 | self.assertEqual(p.scheme, "http")
382 | self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
383 | self.assertEqual(p.path, "/doc/")
384 | self.assertEqual(p.query, "query=yes")
385 | self.assertEqual(p.fragment, "frag")
386 | self.assertEqual(p.username, "User")
387 | self.assertEqual(p.password, "Pass")
388 | self.assertEqual(p.hostname, "www.python.org")
389 | self.assertEqual(p.port, 80)
390 | self.assertEqual(p.geturl(), url)
391 |
392 | # Addressing issue1698, which suggests Username can contain
393 | # "@" characters. Though not RFC compliant, many ftp sites allow
394 | # and request email addresses as usernames.
395 |
396 | url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
397 | p = urlparse.urlsplit(url)
398 | self.assertEqual(p.scheme, "http")
399 | self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
400 | self.assertEqual(p.path, "/doc/")
401 | self.assertEqual(p.query, "query=yes")
402 | self.assertEqual(p.fragment, "frag")
403 | self.assertEqual(p.username, "User@example.com")
404 | self.assertEqual(p.password, "Pass")
405 | self.assertEqual(p.hostname, "www.python.org")
406 | self.assertEqual(p.port, 80)
407 | self.assertEqual(p.geturl(), url)
408 |
409 | def test_attributes_bad_port(self):
410 | """Check handling of non-integer ports."""
411 | p = urlparse.urlsplit("http://www.example.net:foo")
412 | self.assertEqual(p.netloc, "www.example.net:foo")
413 | self.assertRaises(ValueError, lambda: p.port)
414 |
415 | p = urlparse.urlparse("http://www.example.net:foo")
416 | self.assertEqual(p.netloc, "www.example.net:foo")
417 | self.assertRaises(ValueError, lambda: p.port)
418 |
419 | def test_attributes_without_netloc(self):
420 | # This example is straight from RFC 3261. It looks like it
421 | # should allow the username, hostname, and port to be filled
422 | # in, but doesn't. Since it's a URI and doesn't use the
423 | # scheme://netloc syntax, the netloc and related attributes
424 | # should be left empty.
425 | uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
426 | p = urlparse.urlsplit(uri)
427 | self.assertEqual(p.netloc, None)
428 | self.assertEqual(p.username, None)
429 | self.assertEqual(p.password, None)
430 | self.assertEqual(p.hostname, None)
431 | self.assertEqual(p.port, None)
432 | self.assertEqual(p.geturl(), uri)
433 |
434 | p = urlparse.urlparse(uri)
435 | self.assertEqual(p.netloc, None)
436 | self.assertEqual(p.username, None)
437 | self.assertEqual(p.password, None)
438 | self.assertEqual(p.hostname, None)
439 | self.assertEqual(p.port, None)
440 | self.assertEqual(p.geturl(), uri)
441 |
442 | def test_caching(self):
443 | # Test case for bug #1313119
444 | uri = "http://example.com/doc/"
445 | unicode_uri = str(uri)
446 |
447 | urlparse.urlparse(unicode_uri)
448 | p = urlparse.urlparse(uri)
449 | self.assertEqual(type(p.scheme), type(uri))
450 | self.assertEqual(type(p.hostname), type(uri))
451 | self.assertEqual(type(p.path), type(uri))
452 |
453 | def test_noslash(self):
454 | # Issue 1637: http://foo.com?query is legal
455 | self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
456 | ('http', 'example.com', '', '', 'blahblah=/foo', ''))
457 |
458 | def test_anyscheme(self):
459 | # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
460 | self.assertEqual(urlparse.urlparse("s3://foo.com/stuff"),
461 | ('s3', 'foo.com', '/stuff', '', '', ''))
462 | self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"),
463 | ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
464 |
465 | def test_split_relative_urls(self):
466 | self.assertEqual(urlparse.urlparse("x-newscheme:stuff"),
467 | ('x-newscheme', None, 'stuff', '', '', ''))
468 | self.assertEqual(urlparse.urlparse("x-newscheme:/stuff"),
469 | ('x-newscheme', None, '/stuff', '', '', ''))
470 | self.assertEqual(urlparse.urlparse("x-newscheme://stuff"),
471 | ('x-newscheme', 'stuff', '', '', '', ''))
472 | self.assertEqual(urlparse.urlparse("x-newscheme:///stuff"),
473 | ('x-newscheme', '', '/stuff', '', '', ''))
474 |
475 | def test_unsplit_relative_urls(self):
476 | self.assertEqual(urlparse.urlunparse(('x-newscheme', None, 'stuff', '', '', '')),
477 | "x-newscheme:stuff")
478 | self.assertEqual(urlparse.urlunparse(('x-newscheme', None, '/stuff', '', '', '')),
479 | "x-newscheme:/stuff")
480 | self.assertEqual(urlparse.urlunparse(('x-newscheme', 'stuff', '', '', '', '')),
481 | "x-newscheme://stuff")
482 | self.assertEqual(urlparse.urlunparse(('x-newscheme', '', '/stuff', '', '', '')),
483 | "x-newscheme:///stuff")
484 |
485 | # vim:et:sts=4:ts=4
486 |
--------------------------------------------------------------------------------
/tests/doc_test.py:
--------------------------------------------------------------------------------
1 | def test_docs():
2 | from doctest import testfile
3 | failures, _ = testfile('README.md', module_relative=False, encoding='UTF-8')
4 | assert not failures
5 |
--------------------------------------------------------------------------------
/tests/encoding_test.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import yelp_uri.encoding as E
4 | from yelp_uri.urllib_utf8 import quote
5 |
6 |
7 | def test_uri_error():
8 | # Exception handlers around recode catch UnicodeError
9 | assert issubclass(E.MalformedUrlError, UnicodeError), type.mro(E.MalformedUrlError)
10 |
11 |
12 | def test_bad_port():
13 | try:
14 | E.encode_uri('http://foo.bar:buz')
15 | except E.MalformedUrlError as error:
16 | assert error.args == ("Invalid port number: invalid literal for int() with base 10: 'buz'",)
17 |
18 |
19 | def test_bad_domain_segment_too_long():
20 | try:
21 | E.encode_uri('http://foo.%s.bar' % ('x' * 64))
22 | except E.MalformedUrlError as error:
23 | error_msg = (
24 | "Invalid hostname: encoding with 'IDNA' codec failed "
25 | "(UnicodeError: label empty or too long): "
26 | )
27 |
28 | assert error.args == (
29 | error_msg +
30 | repr("foo.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.bar"),
31 | )
32 |
33 |
34 | def test_bad_domain_extra_dots():
35 | # We normalize this one ala Chrome browser
36 | assert E.encode_uri('http://..foo..com../.bar.') == 'http://foo.com/.bar.'
37 |
38 |
39 | def test_recode_none_raises_attribute_error():
40 | with pytest.raises(AttributeError):
41 | E.recode_uri(None)
42 |
43 |
44 | def test_unicode_url_gets_quoted():
45 | url = 'http://www.yelp.com/münchen'
46 | assert E.recode_uri(url) == 'http://www.yelp.com/m%C3%BCnchen'
47 |
48 |
49 | def test_mixed_quoting_url():
50 | """Test that a url with mixed quoting has uniform quoting after requoting"""
51 | url = 'http://www.yelp.com/m%C3%BCnchen/münchen'
52 | assert E.recode_uri(url) == 'http://www.yelp.com/m%C3%BCnchen/m%C3%BCnchen'
53 |
54 |
55 | def test_mixed_quoting_param():
56 | """Tests that a url with mixed quoting in the parameters has uniform quoting after requoting"""
57 | url = 'http://www.yelp.com?m%C3%BCnchen=münchen'
58 | assert E.recode_uri(url) == 'http://www.yelp.com?m%C3%BCnchen=m%C3%BCnchen'
59 |
60 |
61 | def test_mixed_encoding():
62 | """Tests that a url with mixed encoding has uniform encoding after recoding"""
63 | url = 'http://www.yelp.com/m%C3%BCnchen?m%FCnchen'
64 | assert E.recode_uri(url) == 'http://www.yelp.com/m%C3%BCnchen?m%C3%BCnchen'
65 |
66 |
67 | def test_mixed_quoting_multiple_queries():
68 | """Tests that a url with mixed quoting in multiple parameters has uniform quoting after requoting"""
69 | url = 'http://yelp.com/münchen/m%C3%BCnchen?münchen=m%C3%BCnchen&htmlchars=<">'
70 | assert E.recode_uri(url) == \
71 | 'http://yelp.com/m%C3%BCnchen/m%C3%BCnchen?m%C3%BCnchen=m%C3%BCnchen&htmlchars=%3C%22%3E'
72 |
73 |
74 | def test_utf8_url():
75 | """Tests that a url with mixed quoting in multiple parameters has uniform quoting after requoting"""
76 | url = 'http://yelp.com/münchen/m%C3%BCnchen?münchen=m%C3%BCnchen&htmlchars=<">'.encode()
77 | assert E.recode_uri(url) == \
78 | 'http://yelp.com/m%C3%BCnchen/m%C3%BCnchen?m%C3%BCnchen=m%C3%BCnchen&htmlchars=%3C%22%3E'
79 |
80 |
81 | def test_multiple_escapes():
82 | url = 'http://münch.com?zero=münch&one=m%C3%BCnch&two=m%25C3%25BCnch&three=m%2525C3%2525BCnch'
83 | assert E.recode_uri(url) == \
84 | 'http://xn--mnch-0ra.com?zero=m%C3%BCnch&one=m%C3%BCnch&two=m%25C3%25BCnch&three=m%2525C3%2525BCnch'
85 |
86 |
87 | def test_url_reserved_chars():
88 | url = 'http://www.yelp.com?chars=%s' % quote(':/?&=')
89 | assert E.recode_uri(url) == url
90 |
91 |
92 | def test_multi_params_for_individual_path_segment():
93 | # Nothing (overly) strange in this url: nothing should be escaped
94 | url = '/foo;bar;baz/barney;fred;wilma'
95 | assert E.recode_uri(url) == url
96 |
97 |
98 | def test_url_with_params():
99 | url = (
100 | 'http://ad.doubleclick.net/clk;217976351;41128009;f?'
101 | 'http%3A//www.24hourfitness.com/FindClubDetail.do?'
102 | 'clubid=189&edit=null&semiPromoCode=null&cm_mmc='
103 | 'Yelp-_-ClubPage-_-BusinessListing-_-Link'
104 | )
105 | assert E.recode_uri(url) == url
106 |
107 |
108 | def test_url_with_hashbang():
109 | # For a discussion of url hashbangs, see: http://www.jenitennison.com/blog/node/154
110 | url = 'https://twitter.com/#!/YelpCincy/statuses/179565284020060161'
111 | assert E.recode_uri(url) == url
112 |
113 |
114 | def test_url_with_colon():
115 | # Ticket: 31242
116 | url = 'http://www.yelp.fr/biz/smalls-marseille#hrid:u_UQvMf97E8pD4HEb59uIw'
117 | assert E.recode_uri(url) == url
118 |
119 |
120 | def test_param_xss():
121 | assert E.recode_uri('/foo;