├── .coveragerc ├── .gitignore ├── .pre-commit-config.yaml ├── .travis.yml ├── Makefile ├── README.md ├── UNLICENSE ├── bin └── generate-tlds ├── pylintrc ├── requirements.txt ├── requirements_dev.txt ├── setup.py ├── tests ├── __init__.py ├── _urlparse_less_special_test.py ├── doc_test.py ├── encoding_test.py ├── search_test.py └── urllib_utf8_test.py ├── tox.ini └── yelp_uri ├── __init__.py ├── _urlparse_less_special.py ├── encoding.py ├── search.py ├── tlds ├── __init__.py ├── all.py └── common.py └── urllib_utf8.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | timid = True 4 | source = . 5 | omit = 6 | .tox/* 7 | /usr/* 8 | setup.py 9 | venv/* 10 | 11 | [report] 12 | exclude_lines = 13 | # Have to re-enable the standard pragma 14 | \#\s*pragma: no cover 15 | 16 | # Don't complain if tests don't hit defensive assertion code: 17 | ^\s*raise AssertionError\b 18 | ^\s*raise NotImplementedError\b 19 | ^\s*return NotImplemented\b 20 | ^\s*raise$ 21 | 22 | # Don't complain if non-runnable code isn't run: 23 | ^if __name__ == ['"]__main__['"]:$ 24 | 25 | [html] 26 | directory = coverage-html 27 | 28 | # vim:ft=dosini 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.iml 3 | *.py[co] 4 | .*.sw[a-z] 5 | .coverage 6 | .idea 7 | .pre-commit-files 8 | .project 9 | .pydevproject 10 | .tox 11 | .venv.touch 12 | /venv* 13 | coverage-html 14 | dist 15 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: git@github.com:pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: trailing-whitespace 6 | files: \.(py|sh|yaml)$ 7 | - id: end-of-file-fixer 8 | files: \.(py|sh|yaml)$ 9 | - id: check-yaml 10 | files: \.(yaml|yml)$ 11 | - id: debug-statements 12 | files: \.py$ 13 | - id: name-tests-test 14 | files: tests/.+\.py$ 15 | - id: fix-encoding-pragma 16 | args: 17 | - --remove 18 | language_version: python3.8 19 | - repo: http://github.com/asottile/reorder_python_imports 20 | rev: v3.10.0 21 | hooks: 22 | - id: reorder-python-imports 23 | - repo: http://github.com/asottile/pyupgrade 24 | rev: v3.10.1 25 | hooks: 26 | - id: pyupgrade 27 | args: ['--py38-plus'] 28 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | matrix: 3 | include: 4 | - env: TOXENV=py27 5 | - env: TOXENV=py36 6 | python: 3.6 7 | install: pip install tox 8 | script: tox 9 | cache: 10 | directories: 11 | - $HOME/.cache/pip 12 | - $HOME/.cache/pre-commit 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export PATH := $(PWD)/bin:$(PWD)/venv/bin:$(PATH) 2 | 3 | REBUILD_FLAG = 4 | 5 | .PHONY: all 6 | all: venv test 7 | 8 | venv: .venv.touch 9 | rm -rf venv 10 | virtualenv venv --python python3.8 11 | pip install -r requirements_dev.txt 12 | 13 | .PHONY: tests test 14 | tests: test 15 | test: venv 16 | tox $(REBUILD_FLAG) 17 | 18 | 19 | .venv.touch: setup.py requirements.txt requirements_dev.txt 20 | $(eval REBUILD_FLAG := --recreate) 21 | touch .venv.touch 22 | 23 | 24 | .PHONY: clean 25 | clean: 26 | find . -iname '*.pyc' | xargs rm -f 27 | rm -rf .tox 28 | rm -rf ./venv 29 | rm -f .venv.touch 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # yelp\_uri 2 | 3 | [![Build Status](https://travis-ci.org/Yelp/yelp_uri.svg)](https://travis-ci.org/Yelp/yelp\_uri) 4 | 5 | 6 | ## Installation 7 | 8 | For a primer on pip and virtualenv, see the [Python Packaging User Guide](https://python-packaging-user-guide.readthedocs.org/en/latest/tutorial.html). 9 | 10 | TL;DR: `pip install yelp_uri` 11 | 12 | 13 | ## Usage 14 | 15 | Make a well-encoded URI from user input. 16 | 17 | ```python 18 | >>> weird_uri = 'http://münch.com/münch?one=m%C3%BCnch#m%FCnch' 19 | 20 | >>> import yelp_uri.encoding as E 21 | >>> well_encoded = E.recode_uri(weird_uri) 22 | >>> print(well_encoded) 23 | http://xn--mnch-0ra.com/m%C3%BCnch?one=m%C3%BCnch#m%C3%BCnch 24 | 25 | ``` 26 | 27 | Make a user-readable url, from either a well-encoded url or user input: 28 | 29 | ```python 30 | >>> print(E.decode_uri(well_encoded)) 31 | http://münch.com/münch?one=münch#münch 32 | >>> print(E.decode_uri(weird_uri)) 33 | http://münch.com/münch?one=münch#münch 34 | 35 | ``` 36 | 37 | 38 | 39 | `yelp_uri.search` has regexes for finding URLs in user-generated plaintext. 40 | 41 | ```python 42 | >>> plaintext = ''' 43 | ... Reference: http://en.wikipedia.org/wiki/Eon_(geology) 44 | ... Follow @YelpCincy on Twitter (http://twitter.com/YelpCincy) 45 | ... ''' 46 | >>> from yelp_uri.search import url_regex 47 | >>> for url in url_regex.finditer(plaintext): print(url.group()) 48 | http://en.wikipedia.org/wiki/Eon_(geology) 49 | http://twitter.com/YelpCincy 50 | 51 | ``` 52 | -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /bin/generate-tlds: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | This reads the IANA-maintained list of tlds and formats/outputs them for use 4 | in the domains regular expression. To regenerate: 5 | ./bin/generate-tlds > yelp_uri/tlds/all.py 6 | """ 7 | import sys 8 | 9 | import urllib2 10 | 11 | 12 | def main(url='http://data.iana.org/TLD/tlds-alpha-by-domain.txt'): 13 | try: 14 | domain_data = urllib2.urlopen(url) 15 | except urllib2.URLError as e: 16 | print( 17 | "Could not get the domains from the given URL. Perhaps the IANA" 18 | "has changed the location of the file or it no longer exists." 19 | ) 20 | return e.reason 21 | 22 | # Convert all newlines except the last one to '|', so 'foo\nbar\n' -> 'foo|bar'. 23 | # Ignores all lines starting with '#', which is a comment in the text file. 24 | data = ( 25 | line.lower() 26 | for line in domain_data.read().splitlines() 27 | if not line.startswith("#") and line.strip() 28 | ) 29 | 30 | tlds = set() 31 | for datum in data: 32 | # get both the punycoded and unicoded versions: 33 | tlds.add(datum.decode('utf-8')) 34 | tlds.add(datum.decode('idna')) 35 | 36 | domains_string = "',\n '".join(sorted(tlds)) 37 | 38 | print('''\ 39 | # -*- coding: utf-8 -*- 40 | from __future__ import unicode_literals 41 | # Generated automatically. To regenerate: 42 | # ./bin/generate-tlds > yelp_uri/tlds/all.py 43 | all_tlds = '|'.join(( 44 | '{}', 45 | ))'''.format(domains_string).encode('UTF-8')) 46 | 47 | 48 | if __name__ == "__main__": 49 | sys.exit(main()) 50 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MESSAGES CONTROL] 2 | disable= 3 | locally-disabled, 4 | missing-docstring, 5 | maybe-no-member, 6 | redefined-variable-type, 7 | redundant-keyword-arg, 8 | too-many-function-args, 9 | 10 | 11 | [REPORTS] 12 | output-format=colorized 13 | reports=no 14 | 15 | [BASIC] 16 | #const-rgx=(([A-Za-z_][A-Za-z0-9_]*)|(__.*__))$ 17 | const-rgx=(([A-Za-z_][A-Za-z0-9_]*)|(__.*__))$ 18 | 19 | #function-rgx=[a-z_][a-z0-9_]{2,30}$ 20 | function-rgx=[a-z_][a-z0-9_]{2,60}$ 21 | 22 | #method-rgx=[a-z_][a-z0-9_]{2,30}$ 23 | method-rgx=(%(function-rgx)s|%(const-rgx)s) 24 | 25 | #variable-rgx=[a-z_][a-z0-9_]{2,30}$ 26 | variable-rgx=[a-z_][a-z0-9_]{0,30}$ 27 | 28 | [FORMAT] 29 | max-line-length=131 30 | 31 | [TYPECHECK] 32 | ignored-classes= 33 | pytest, 34 | RFC3986, 35 | _MovedItems, 36 | 37 | [DESIGN] 38 | min-public-methods=0 39 | 40 | # vim:ft=dosini: 41 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | . 2 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | coverage 2 | flake8 3 | mock 4 | pytest 5 | pre-commit 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from setuptools import setup 3 | 4 | 5 | def main(): 6 | setup( 7 | name='yelp_uri', 8 | version='2.0.1', 9 | description="Uri utilities maintained by Yelp", 10 | url='https://github.com/Yelp/yelp_uri', 11 | author='Buck Golemon', 12 | author_email='buck@yelp.com', 13 | platforms='all', 14 | classifiers=[ 15 | 'License :: Public Domain', 16 | 'Programming Language :: Python :: 3.8', 17 | ], 18 | packages=find_packages(exclude=('tests*',)), 19 | install_requires=[ 20 | 'yelp_encodings', 21 | 'yelp_bytes' 22 | ], 23 | options={ 24 | 'bdist_wheel': { 25 | 'universal': 1, 26 | } 27 | }, 28 | ) 29 | 30 | 31 | if __name__ == '__main__': 32 | exit(main()) 33 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/yelp_uri/8688042e6579bc235e8b2ddd2b552c5be84ba674/tests/__init__.py -------------------------------------------------------------------------------- /tests/_urlparse_less_special_test.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """Tests are also pulled from stdlib 2.6 3 | This file is space-indented to ease merging from upstream. 4 | 5 | http://hg.python.org/cpython/raw-file/4a17784f2fee/Lib/test/test_urlparse.py 6 | """ 7 | import unittest 8 | 9 | import yelp_uri._urlparse_less_special as urlparse 10 | 11 | 12 | RFC1808_BASE = "http://a/b/c/d;p?q#f" 13 | RFC2396_BASE = "http://a/b/c/d;p?q" 14 | RFC3986_BASE = 'http://a/b/c/d;p?q' 15 | SIMPLE_BASE = 'http://a/b/c/d' 16 | 17 | # A list of test cases. Each test case is a a two-tuple that contains 18 | # a string with the query and a dictionary with the expected result. 19 | 20 | parse_qsl_test_cases = [ 21 | ("", []), 22 | ("&", []), 23 | ("&&", []), 24 | ("=", [('', '')]), 25 | ("=a", [('', 'a')]), 26 | ("a", [('a', '')]), 27 | ("a=", [('a', '')]), 28 | ("a=", [('a', '')]), 29 | ("&a=b", [('a', 'b')]), 30 | ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), 31 | ("a=1&a=2", [('a', '1'), ('a', '2')]), 32 | ] 33 | 34 | parse_qs_test_cases = [ 35 | ("", {}), 36 | ("&", {}), 37 | ("&&", {}), 38 | ("=", {'': ['']}), 39 | ("=a", {'': ['a']}), 40 | ("a", {'a': ['']}), 41 | ("a=", {'a': ['']}), 42 | ("&a=b", {'a': ['b']}), 43 | ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), 44 | ("a=1&a=2", {'a': ['1', '2']}), 45 | ] 46 | 47 | 48 | class UrlParseTestCase(unittest.TestCase): 49 | def checkRoundtrips(self, url, parsed, split): 50 | result = urlparse.urlparse(url) 51 | self.assertEqual(result, parsed) 52 | t = (result.scheme, result.netloc, result.path, 53 | result.params, result.query, result.fragment) 54 | self.assertEqual(t, parsed) 55 | # put it back together and it should be the same 56 | result2 = urlparse.urlunparse(result) 57 | self.assertEqual(result2, url) 58 | self.assertEqual(result2, result.geturl()) 59 | 60 | # the result of geturl() is a fixpoint; we can always parse it 61 | # again to get the same result: 62 | result3 = urlparse.urlparse(result.geturl()) 63 | self.assertEqual(result3.geturl(), result.geturl()) 64 | self.assertEqual(result3, result) 65 | self.assertEqual(result3.scheme, result.scheme) 66 | self.assertEqual(result3.netloc, result.netloc) 67 | self.assertEqual(result3.path, result.path) 68 | self.assertEqual(result3.params, result.params) 69 | self.assertEqual(result3.query, result.query) 70 | self.assertEqual(result3.fragment, result.fragment) 71 | self.assertEqual(result3.username, result.username) 72 | self.assertEqual(result3.password, result.password) 73 | self.assertEqual(result3.hostname, result.hostname) 74 | self.assertEqual(result3.port, result.port) 75 | 76 | # check the roundtrip using urlsplit() as well 77 | result = urlparse.urlsplit(url) 78 | self.assertEqual(result, split) 79 | t = (result.scheme, result.netloc, result.path, 80 | result.query, result.fragment) 81 | self.assertEqual(t, split) 82 | result2 = urlparse.urlunsplit(result) 83 | self.assertEqual(result2, url) 84 | self.assertEqual(result2, result.geturl()) 85 | 86 | # check the fixpoint property of re-parsing the result of geturl() 87 | result3 = urlparse.urlsplit(result.geturl()) 88 | self.assertEqual(result3.geturl(), result.geturl()) 89 | self.assertEqual(result3, result) 90 | self.assertEqual(result3.scheme, result.scheme) 91 | self.assertEqual(result3.netloc, result.netloc) 92 | self.assertEqual(result3.path, result.path) 93 | self.assertEqual(result3.query, result.query) 94 | self.assertEqual(result3.fragment, result.fragment) 95 | self.assertEqual(result3.username, result.username) 96 | self.assertEqual(result3.password, result.password) 97 | self.assertEqual(result3.hostname, result.hostname) 98 | self.assertEqual(result3.port, result.port) 99 | 100 | def test_qsl(self): 101 | for orig, expect in parse_qsl_test_cases: 102 | result = urlparse.parse_qsl(orig, keep_blank_values=True) 103 | self.assertEqual(result, expect, "Error parsing %s" % repr(orig)) 104 | 105 | def test_qs(self): 106 | for orig, expect in parse_qs_test_cases: 107 | result = urlparse.parse_qs(orig, keep_blank_values=True) 108 | self.assertEqual(result, expect, "Error parsing %s" % repr(orig)) 109 | 110 | def test_roundtrips(self): 111 | testcases = [ 112 | ('file:///tmp/junk.txt', 113 | ('file', '', '/tmp/junk.txt', '', '', ''), 114 | ('file', '', '/tmp/junk.txt', '', '')), 115 | ('imap://mail.python.org/mbox1', 116 | ('imap', 'mail.python.org', '/mbox1', '', '', ''), 117 | ('imap', 'mail.python.org', '/mbox1', '', '')), 118 | ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf', 119 | ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', 120 | '', '', ''), 121 | ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', 122 | '', '')), 123 | ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/', 124 | ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', 125 | '', '', ''), 126 | ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', 127 | '', '')), 128 | ('git+ssh://git@github.com/user/project.git', 129 | ('git+ssh', 'git@github.com', '/user/project.git', 130 | '', '', ''), 131 | ('git+ssh', 'git@github.com', '/user/project.git', 132 | '', '')) 133 | ] 134 | for url, parsed, split in testcases: 135 | self.checkRoundtrips(url, parsed, split) 136 | 137 | def test_http_roundtrips(self): 138 | # urlparse.urlsplit treats 'http:' as an optimized special case, 139 | # so we test both 'http:' and 'https:' in all the following. 140 | # Three cheers for white box knowledge! 141 | testcases = [ 142 | ('://www.python.org', 143 | ('www.python.org', '', '', '', ''), 144 | ('www.python.org', '', '', '')), 145 | ('://www.python.org#abc', 146 | ('www.python.org', '', '', '', 'abc'), 147 | ('www.python.org', '', '', 'abc')), 148 | ('://www.python.org?q=abc', 149 | ('www.python.org', '', '', 'q=abc', ''), 150 | ('www.python.org', '', 'q=abc', '')), 151 | ('://www.python.org/#abc', 152 | ('www.python.org', '/', '', '', 'abc'), 153 | ('www.python.org', '/', '', 'abc')), 154 | ('://a/b/c/d;p?q#f', 155 | ('a', '/b/c/d', 'p', 'q', 'f'), 156 | ('a', '/b/c/d;p', 'q', 'f')), 157 | ] 158 | for scheme in ('http', 'https'): 159 | for url, parsed, split in testcases: 160 | url = scheme + url 161 | parsed = (scheme,) + parsed 162 | split = (scheme,) + split 163 | self.checkRoundtrips(url, parsed, split) 164 | 165 | def checkJoin(self, base, relurl, expected): 166 | self.assertEqual(urlparse.urljoin(base, relurl), expected, 167 | (base, relurl, expected)) 168 | 169 | def test_unparse_parse(self): 170 | for u in ['Python', './Python', 'x-newscheme://foo.com/stuff', 'x://y', 'x:/y', 'x:/', '/', ]: 171 | self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u) 172 | self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u) 173 | 174 | def test_RFC1808(self): 175 | # "normal" cases from RFC 1808: 176 | self.checkJoin(RFC1808_BASE, 'g:h', 'g:h') 177 | self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g') 178 | self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g') 179 | self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/') 180 | self.checkJoin(RFC1808_BASE, '/g', 'http://a/g') 181 | self.checkJoin(RFC1808_BASE, '//g', 'http://g') 182 | self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y') 183 | self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 184 | self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s') 185 | self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s') 186 | self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') 187 | self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s') 188 | self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x') 189 | self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') 190 | self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/') 191 | self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/') 192 | self.checkJoin(RFC1808_BASE, '..', 'http://a/b/') 193 | self.checkJoin(RFC1808_BASE, '../', 'http://a/b/') 194 | self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g') 195 | self.checkJoin(RFC1808_BASE, '../..', 'http://a/') 196 | self.checkJoin(RFC1808_BASE, '../../', 'http://a/') 197 | self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g') 198 | 199 | # "abnormal" cases from RFC 1808: 200 | self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f') 201 | self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g') 202 | self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g') 203 | self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g') 204 | self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g') 205 | self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.') 206 | self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g') 207 | self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..') 208 | self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g') 209 | self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g') 210 | self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/') 211 | self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h') 212 | self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h') 213 | 214 | # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808), 215 | # so we'll not actually run these tests (which expect 1808 behavior). 216 | # self.checkJoin(RFC1808_BASE, 'http:g', 'http:g') 217 | # self.checkJoin(RFC1808_BASE, 'http:', 'http:') 218 | 219 | def test_RFC2396(self): 220 | # cases from RFC 2396 221 | 222 | self.checkJoin(RFC2396_BASE, 'g:h', 'g:h') 223 | self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g') 224 | self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g') 225 | self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/') 226 | self.checkJoin(RFC2396_BASE, '/g', 'http://a/g') 227 | self.checkJoin(RFC2396_BASE, '//g', 'http://g') 228 | self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y') 229 | self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s') 230 | self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s') 231 | self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s') 232 | self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x') 233 | self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') 234 | self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/') 235 | self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/') 236 | self.checkJoin(RFC2396_BASE, '..', 'http://a/b/') 237 | self.checkJoin(RFC2396_BASE, '../', 'http://a/b/') 238 | self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g') 239 | self.checkJoin(RFC2396_BASE, '../..', 'http://a/') 240 | self.checkJoin(RFC2396_BASE, '../../', 'http://a/') 241 | self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g') 242 | self.checkJoin(RFC2396_BASE, '', RFC2396_BASE) 243 | self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g') 244 | self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g') 245 | self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g') 246 | self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g') 247 | self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.') 248 | self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g') 249 | self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..') 250 | self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g') 251 | self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g') 252 | self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/') 253 | self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h') 254 | self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h') 255 | self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y') 256 | self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y') 257 | self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 258 | self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x') 259 | self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') 260 | self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x') 261 | 262 | def test_RFC3986(self): 263 | # Test cases from RFC3986 264 | self.checkJoin(RFC3986_BASE, '?y', 'http://a/b/c/d;p?y') 265 | self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x') 266 | self.checkJoin(RFC3986_BASE, 'g:h', 'g:h') 267 | self.checkJoin(RFC3986_BASE, 'g', 'http://a/b/c/g') 268 | self.checkJoin(RFC3986_BASE, './g', 'http://a/b/c/g') 269 | self.checkJoin(RFC3986_BASE, 'g/', 'http://a/b/c/g/') 270 | self.checkJoin(RFC3986_BASE, '/g', 'http://a/g') 271 | self.checkJoin(RFC3986_BASE, '//g', 'http://g') 272 | self.checkJoin(RFC3986_BASE, '?y', 'http://a/b/c/d;p?y') 273 | self.checkJoin(RFC3986_BASE, 'g?y', 'http://a/b/c/g?y') 274 | self.checkJoin(RFC3986_BASE, '#s', 'http://a/b/c/d;p?q#s') 275 | self.checkJoin(RFC3986_BASE, 'g#s', 'http://a/b/c/g#s') 276 | self.checkJoin(RFC3986_BASE, 'g?y#s', 'http://a/b/c/g?y#s') 277 | self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x') 278 | self.checkJoin(RFC3986_BASE, 'g;x', 'http://a/b/c/g;x') 279 | self.checkJoin(RFC3986_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') 280 | self.checkJoin(RFC3986_BASE, '', 'http://a/b/c/d;p?q') 281 | self.checkJoin(RFC3986_BASE, '.', 'http://a/b/c/') 282 | self.checkJoin(RFC3986_BASE, './', 'http://a/b/c/') 283 | self.checkJoin(RFC3986_BASE, '..', 'http://a/b/') 284 | self.checkJoin(RFC3986_BASE, '../', 'http://a/b/') 285 | self.checkJoin(RFC3986_BASE, '../g', 'http://a/b/g') 286 | self.checkJoin(RFC3986_BASE, '../..', 'http://a/') 287 | self.checkJoin(RFC3986_BASE, '../../', 'http://a/') 288 | self.checkJoin(RFC3986_BASE, '../../g', 'http://a/g') 289 | 290 | # Abnormal Examples 291 | 292 | # The 'abnormal scenarios' are incompatible with RFC2986 parsing 293 | # Tests are here for reference. 294 | 295 | # self.checkJoin(RFC3986_BASE, '../../../g','http://a/g') 296 | # self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g') 297 | # self.checkJoin(RFC3986_BASE, '/./g','http://a/g') 298 | # self.checkJoin(RFC3986_BASE, '/../g','http://a/g') 299 | 300 | self.checkJoin(RFC3986_BASE, 'g.', 'http://a/b/c/g.') 301 | self.checkJoin(RFC3986_BASE, '.g', 'http://a/b/c/.g') 302 | self.checkJoin(RFC3986_BASE, 'g..', 'http://a/b/c/g..') 303 | self.checkJoin(RFC3986_BASE, '..g', 'http://a/b/c/..g') 304 | self.checkJoin(RFC3986_BASE, './../g', 'http://a/b/g') 305 | self.checkJoin(RFC3986_BASE, './g/.', 'http://a/b/c/g/') 306 | self.checkJoin(RFC3986_BASE, 'g/./h', 'http://a/b/c/g/h') 307 | self.checkJoin(RFC3986_BASE, 'g/../h', 'http://a/b/c/h') 308 | self.checkJoin(RFC3986_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y') 309 | self.checkJoin(RFC3986_BASE, 'g;x=1/../y', 'http://a/b/c/y') 310 | self.checkJoin(RFC3986_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 311 | self.checkJoin(RFC3986_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x') 312 | self.checkJoin(RFC3986_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') 313 | self.checkJoin(RFC3986_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x') 314 | # self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser 315 | self.checkJoin(RFC3986_BASE, 'http:g', 'http://a/b/c/g') # relaxed parser 316 | 317 | def test_urljoins(self): 318 | self.checkJoin(SIMPLE_BASE, 'g:h', 'g:h') 319 | self.checkJoin(SIMPLE_BASE, 'http:g', 'http://a/b/c/g') 320 | self.checkJoin(SIMPLE_BASE, 'http:', 'http://a/b/c/d') 321 | self.checkJoin(SIMPLE_BASE, 'g', 'http://a/b/c/g') 322 | self.checkJoin(SIMPLE_BASE, './g', 'http://a/b/c/g') 323 | self.checkJoin(SIMPLE_BASE, 'g/', 'http://a/b/c/g/') 324 | self.checkJoin(SIMPLE_BASE, '/g', 'http://a/g') 325 | self.checkJoin(SIMPLE_BASE, '//g', 'http://g') 326 | self.checkJoin(SIMPLE_BASE, '?y', 'http://a/b/c/d?y') 327 | self.checkJoin(SIMPLE_BASE, 'g?y', 'http://a/b/c/g?y') 328 | self.checkJoin(SIMPLE_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 329 | self.checkJoin(SIMPLE_BASE, '.', 'http://a/b/c/') 330 | self.checkJoin(SIMPLE_BASE, './', 'http://a/b/c/') 331 | self.checkJoin(SIMPLE_BASE, '..', 'http://a/b/') 332 | self.checkJoin(SIMPLE_BASE, '../', 'http://a/b/') 333 | self.checkJoin(SIMPLE_BASE, '../g', 'http://a/b/g') 334 | self.checkJoin(SIMPLE_BASE, '../..', 'http://a/') 335 | self.checkJoin(SIMPLE_BASE, '../../g', 'http://a/g') 336 | self.checkJoin(SIMPLE_BASE, '../../../g', 'http://a/../g') 337 | self.checkJoin(SIMPLE_BASE, './../g', 'http://a/b/g') 338 | self.checkJoin(SIMPLE_BASE, './g/.', 'http://a/b/c/g/') 339 | self.checkJoin(SIMPLE_BASE, '/./g', 'http://a/./g') 340 | self.checkJoin(SIMPLE_BASE, 'g/./h', 'http://a/b/c/g/h') 341 | self.checkJoin(SIMPLE_BASE, 'g/../h', 'http://a/b/c/h') 342 | self.checkJoin(SIMPLE_BASE, 'http:g', 'http://a/b/c/g') 343 | self.checkJoin(SIMPLE_BASE, 'http:', 'http://a/b/c/d') 344 | self.checkJoin(SIMPLE_BASE, 'http:?y', 'http://a/b/c/d?y') 345 | self.checkJoin(SIMPLE_BASE, 'http:g?y', 'http://a/b/c/g?y') 346 | self.checkJoin(SIMPLE_BASE, 'http:g?y/./x', 'http://a/b/c/g?y/./x') 347 | 348 | def test_urldefrag(self): 349 | for url, defrag, frag in [ 350 | ('http://python.org#frag', 'http://python.org', 'frag'), 351 | ('http://python.org', 'http://python.org', ''), 352 | ('http://python.org/#frag', 'http://python.org/', 'frag'), 353 | ('http://python.org/', 'http://python.org/', ''), 354 | ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), 355 | ('http://python.org/?q', 'http://python.org/?q', ''), 356 | ('http://python.org/p#frag', 'http://python.org/p', 'frag'), 357 | ('http://python.org/p?q', 'http://python.org/p?q', ''), 358 | (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), 359 | (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), 360 | ]: 361 | self.assertEqual(urlparse.urldefrag(url), (defrag, frag)) 362 | 363 | def test_urlsplit_attributes(self): 364 | url = "HTTP://WWW.PYTHON.ORG/doc/#frag" 365 | p = urlparse.urlsplit(url) 366 | self.assertEqual(p.scheme, "http") 367 | self.assertEqual(p.netloc, "WWW.PYTHON.ORG") 368 | self.assertEqual(p.path, "/doc/") 369 | self.assertEqual(p.query, "") 370 | self.assertEqual(p.fragment, "frag") 371 | self.assertEqual(p.username, None) 372 | self.assertEqual(p.password, None) 373 | self.assertEqual(p.hostname, "WWW.PYTHON.ORG") 374 | self.assertEqual(p.port, None) 375 | # geturl() won't return exactly the original URL in this case 376 | # since the scheme is always case-normalized 377 | # self.assertEqual(p.geturl(), url) 378 | 379 | url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" 380 | p = urlparse.urlsplit(url) 381 | self.assertEqual(p.scheme, "http") 382 | self.assertEqual(p.netloc, "User:Pass@www.python.org:080") 383 | self.assertEqual(p.path, "/doc/") 384 | self.assertEqual(p.query, "query=yes") 385 | self.assertEqual(p.fragment, "frag") 386 | self.assertEqual(p.username, "User") 387 | self.assertEqual(p.password, "Pass") 388 | self.assertEqual(p.hostname, "www.python.org") 389 | self.assertEqual(p.port, 80) 390 | self.assertEqual(p.geturl(), url) 391 | 392 | # Addressing issue1698, which suggests Username can contain 393 | # "@" characters. Though not RFC compliant, many ftp sites allow 394 | # and request email addresses as usernames. 395 | 396 | url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" 397 | p = urlparse.urlsplit(url) 398 | self.assertEqual(p.scheme, "http") 399 | self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080") 400 | self.assertEqual(p.path, "/doc/") 401 | self.assertEqual(p.query, "query=yes") 402 | self.assertEqual(p.fragment, "frag") 403 | self.assertEqual(p.username, "User@example.com") 404 | self.assertEqual(p.password, "Pass") 405 | self.assertEqual(p.hostname, "www.python.org") 406 | self.assertEqual(p.port, 80) 407 | self.assertEqual(p.geturl(), url) 408 | 409 | def test_attributes_bad_port(self): 410 | """Check handling of non-integer ports.""" 411 | p = urlparse.urlsplit("http://www.example.net:foo") 412 | self.assertEqual(p.netloc, "www.example.net:foo") 413 | self.assertRaises(ValueError, lambda: p.port) 414 | 415 | p = urlparse.urlparse("http://www.example.net:foo") 416 | self.assertEqual(p.netloc, "www.example.net:foo") 417 | self.assertRaises(ValueError, lambda: p.port) 418 | 419 | def test_attributes_without_netloc(self): 420 | # This example is straight from RFC 3261. It looks like it 421 | # should allow the username, hostname, and port to be filled 422 | # in, but doesn't. Since it's a URI and doesn't use the 423 | # scheme://netloc syntax, the netloc and related attributes 424 | # should be left empty. 425 | uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" 426 | p = urlparse.urlsplit(uri) 427 | self.assertEqual(p.netloc, None) 428 | self.assertEqual(p.username, None) 429 | self.assertEqual(p.password, None) 430 | self.assertEqual(p.hostname, None) 431 | self.assertEqual(p.port, None) 432 | self.assertEqual(p.geturl(), uri) 433 | 434 | p = urlparse.urlparse(uri) 435 | self.assertEqual(p.netloc, None) 436 | self.assertEqual(p.username, None) 437 | self.assertEqual(p.password, None) 438 | self.assertEqual(p.hostname, None) 439 | self.assertEqual(p.port, None) 440 | self.assertEqual(p.geturl(), uri) 441 | 442 | def test_caching(self): 443 | # Test case for bug #1313119 444 | uri = "http://example.com/doc/" 445 | unicode_uri = str(uri) 446 | 447 | urlparse.urlparse(unicode_uri) 448 | p = urlparse.urlparse(uri) 449 | self.assertEqual(type(p.scheme), type(uri)) 450 | self.assertEqual(type(p.hostname), type(uri)) 451 | self.assertEqual(type(p.path), type(uri)) 452 | 453 | def test_noslash(self): 454 | # Issue 1637: http://foo.com?query is legal 455 | self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"), 456 | ('http', 'example.com', '', '', 'blahblah=/foo', '')) 457 | 458 | def test_anyscheme(self): 459 | # Issue 7904: s3://foo.com/stuff has netloc "foo.com". 460 | self.assertEqual(urlparse.urlparse("s3://foo.com/stuff"), 461 | ('s3', 'foo.com', '/stuff', '', '', '')) 462 | self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"), 463 | ('x-newscheme', 'foo.com', '/stuff', '', '', '')) 464 | 465 | def test_split_relative_urls(self): 466 | self.assertEqual(urlparse.urlparse("x-newscheme:stuff"), 467 | ('x-newscheme', None, 'stuff', '', '', '')) 468 | self.assertEqual(urlparse.urlparse("x-newscheme:/stuff"), 469 | ('x-newscheme', None, '/stuff', '', '', '')) 470 | self.assertEqual(urlparse.urlparse("x-newscheme://stuff"), 471 | ('x-newscheme', 'stuff', '', '', '', '')) 472 | self.assertEqual(urlparse.urlparse("x-newscheme:///stuff"), 473 | ('x-newscheme', '', '/stuff', '', '', '')) 474 | 475 | def test_unsplit_relative_urls(self): 476 | self.assertEqual(urlparse.urlunparse(('x-newscheme', None, 'stuff', '', '', '')), 477 | "x-newscheme:stuff") 478 | self.assertEqual(urlparse.urlunparse(('x-newscheme', None, '/stuff', '', '', '')), 479 | "x-newscheme:/stuff") 480 | self.assertEqual(urlparse.urlunparse(('x-newscheme', 'stuff', '', '', '', '')), 481 | "x-newscheme://stuff") 482 | self.assertEqual(urlparse.urlunparse(('x-newscheme', '', '/stuff', '', '', '')), 483 | "x-newscheme:///stuff") 484 | 485 | # vim:et:sts=4:ts=4 486 | -------------------------------------------------------------------------------- /tests/doc_test.py: -------------------------------------------------------------------------------- 1 | def test_docs(): 2 | from doctest import testfile 3 | failures, _ = testfile('README.md', module_relative=False, encoding='UTF-8') 4 | assert not failures 5 | -------------------------------------------------------------------------------- /tests/encoding_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import yelp_uri.encoding as E 4 | from yelp_uri.urllib_utf8 import quote 5 | 6 | 7 | def test_uri_error(): 8 | # Exception handlers around recode catch UnicodeError 9 | assert issubclass(E.MalformedUrlError, UnicodeError), type.mro(E.MalformedUrlError) 10 | 11 | 12 | def test_bad_port(): 13 | try: 14 | E.encode_uri('http://foo.bar:buz') 15 | except E.MalformedUrlError as error: 16 | assert error.args == ("Invalid port number: invalid literal for int() with base 10: 'buz'",) 17 | 18 | 19 | def test_bad_domain_segment_too_long(): 20 | try: 21 | E.encode_uri('http://foo.%s.bar' % ('x' * 64)) 22 | except E.MalformedUrlError as error: 23 | error_msg = ( 24 | "Invalid hostname: encoding with 'IDNA' codec failed " 25 | "(UnicodeError: label empty or too long): " 26 | ) 27 | 28 | assert error.args == ( 29 | error_msg + 30 | repr("foo.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.bar"), 31 | ) 32 | 33 | 34 | def test_bad_domain_extra_dots(): 35 | # We normalize this one ala Chrome browser 36 | assert E.encode_uri('http://..foo..com../.bar.') == 'http://foo.com/.bar.' 37 | 38 | 39 | def test_recode_none_raises_attribute_error(): 40 | with pytest.raises(AttributeError): 41 | E.recode_uri(None) 42 | 43 | 44 | def test_unicode_url_gets_quoted(): 45 | url = 'http://www.yelp.com/münchen' 46 | assert E.recode_uri(url) == 'http://www.yelp.com/m%C3%BCnchen' 47 | 48 | 49 | def test_mixed_quoting_url(): 50 | """Test that a url with mixed quoting has uniform quoting after requoting""" 51 | url = 'http://www.yelp.com/m%C3%BCnchen/münchen' 52 | assert E.recode_uri(url) == 'http://www.yelp.com/m%C3%BCnchen/m%C3%BCnchen' 53 | 54 | 55 | def test_mixed_quoting_param(): 56 | """Tests that a url with mixed quoting in the parameters has uniform quoting after requoting""" 57 | url = 'http://www.yelp.com?m%C3%BCnchen=münchen' 58 | assert E.recode_uri(url) == 'http://www.yelp.com?m%C3%BCnchen=m%C3%BCnchen' 59 | 60 | 61 | def test_mixed_encoding(): 62 | """Tests that a url with mixed encoding has uniform encoding after recoding""" 63 | url = 'http://www.yelp.com/m%C3%BCnchen?m%FCnchen' 64 | assert E.recode_uri(url) == 'http://www.yelp.com/m%C3%BCnchen?m%C3%BCnchen' 65 | 66 | 67 | def test_mixed_quoting_multiple_queries(): 68 | """Tests that a url with mixed quoting in multiple parameters has uniform quoting after requoting""" 69 | url = 'http://yelp.com/münchen/m%C3%BCnchen?münchen=m%C3%BCnchen&htmlchars=<">' 70 | assert E.recode_uri(url) == \ 71 | 'http://yelp.com/m%C3%BCnchen/m%C3%BCnchen?m%C3%BCnchen=m%C3%BCnchen&htmlchars=%3C%22%3E' 72 | 73 | 74 | def test_utf8_url(): 75 | """Tests that a url with mixed quoting in multiple parameters has uniform quoting after requoting""" 76 | url = 'http://yelp.com/münchen/m%C3%BCnchen?münchen=m%C3%BCnchen&htmlchars=<">'.encode() 77 | assert E.recode_uri(url) == \ 78 | 'http://yelp.com/m%C3%BCnchen/m%C3%BCnchen?m%C3%BCnchen=m%C3%BCnchen&htmlchars=%3C%22%3E' 79 | 80 | 81 | def test_multiple_escapes(): 82 | url = 'http://münch.com?zero=münch&one=m%C3%BCnch&two=m%25C3%25BCnch&three=m%2525C3%2525BCnch' 83 | assert E.recode_uri(url) == \ 84 | 'http://xn--mnch-0ra.com?zero=m%C3%BCnch&one=m%C3%BCnch&two=m%25C3%25BCnch&three=m%2525C3%2525BCnch' 85 | 86 | 87 | def test_url_reserved_chars(): 88 | url = 'http://www.yelp.com?chars=%s' % quote(':/?&=') 89 | assert E.recode_uri(url) == url 90 | 91 | 92 | def test_multi_params_for_individual_path_segment(): 93 | # Nothing (overly) strange in this url: nothing should be escaped 94 | url = '/foo;bar;baz/barney;fred;wilma' 95 | assert E.recode_uri(url) == url 96 | 97 | 98 | def test_url_with_params(): 99 | url = ( 100 | 'http://ad.doubleclick.net/clk;217976351;41128009;f?' 101 | 'http%3A//www.24hourfitness.com/FindClubDetail.do?' 102 | 'clubid=189&edit=null&semiPromoCode=null&cm_mmc=' 103 | 'Yelp-_-ClubPage-_-BusinessListing-_-Link' 104 | ) 105 | assert E.recode_uri(url) == url 106 | 107 | 108 | def test_url_with_hashbang(): 109 | # For a discussion of url hashbangs, see: http://www.jenitennison.com/blog/node/154 110 | url = 'https://twitter.com/#!/YelpCincy/statuses/179565284020060161' 111 | assert E.recode_uri(url) == url 112 | 113 | 114 | def test_url_with_colon(): 115 | # Ticket: 31242 116 | url = 'http://www.yelp.fr/biz/smalls-marseille#hrid:u_UQvMf97E8pD4HEb59uIw' 117 | assert E.recode_uri(url) == url 118 | 119 | 120 | def test_param_xss(): 121 | assert E.recode_uri('/foo;