├── .gitignore ├── .travis.yml ├── AUTHORS ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── Makefile ├── build_docs.sh ├── conf.py └── index.rst ├── makefile ├── purl ├── __init__.py ├── template.py └── url.py ├── pytest.ini ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── test_expansion.py ├── test_template.py ├── test_url.py └── test_utils.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info/ 2 | *.pyc 3 | .tox 4 | __pycache__/ 5 | dist/* 6 | docs/_build/* 7 | build 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 2.7 4 | - 3.6 5 | - 3.7 6 | - 3.8 7 | - pypy 8 | - pypy3 9 | install: 10 | - make install 11 | script: 12 | - make test 13 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | David Winterbottom 2 | 3 | Contributors: 4 | 5 | Wolfgang Langner 6 | xrotwang (https://github.com/xrotwang) 7 | Przemysław Hejman (https://github.com/mieciu) 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2012 purl authors (see AUTHORS file) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst LICENSE 2 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ================================ 2 | purl - A simple Python URL class 3 | ================================ 4 | 5 | A simple, immutable URL class with a clean API for interrogation and 6 | manipulation. Supports Pythons 2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8 and pypy. 7 | 8 | Also supports template URLs as per `RFC 6570`_ 9 | 10 | Contents: 11 | 12 | .. contents:: :local: 13 | :depth: 1 14 | 15 | .. image:: https://secure.travis-ci.org/codeinthehole/purl.png 16 | :target: https://travis-ci.org/codeinthehole/purl 17 | 18 | .. image:: https://img.shields.io/pypi/v/purl.svg 19 | :target: https://crate.io/packages/purl/ 20 | 21 | .. _`RFC 6570`: http://tools.ietf.org/html/rfc6570 22 | 23 | Docs 24 | ---- 25 | 26 | http://purl.readthedocs.org/en/latest/ 27 | 28 | Install 29 | ------- 30 | 31 | From PyPI (stable):: 32 | 33 | $ pip install purl 34 | 35 | From Github (unstable):: 36 | 37 | $ pip install git+git://github.com/codeinthehole/purl.git#egg=purl 38 | 39 | Use 40 | --- 41 | 42 | Construct: 43 | 44 | .. code:: python 45 | 46 | >>> from purl import URL 47 | 48 | # String constructor 49 | >>> from_str = URL('https://www.google.com/search?q=testing') 50 | 51 | # Keyword constructor 52 | >>> from_kwargs = URL(scheme='https', host='www.google.com', path='/search', query='q=testing') 53 | 54 | # Combine 55 | >>> from_combo = URL('https://www.google.com').path('search').query_param('q', 'testing') 56 | 57 | URL objects are immutable - all mutator methods return a new instance. 58 | 59 | Interrogate: 60 | 61 | .. code:: python 62 | 63 | >>> u = URL('https://www.google.com/search?q=testing') 64 | >>> u.scheme() 65 | 'https' 66 | >>> u.host() 67 | 'www.google.com' 68 | >>> u.domain() 69 | 'www.google.com' 70 | >>> u.username() 71 | >>> u.password() 72 | >>> u.netloc() 73 | 'www.google.com' 74 | >>> u.port() 75 | >>> u.path() 76 | '/search' 77 | >>> u.query() 78 | 'q=testing' 79 | >>> u.fragment() 80 | '' 81 | >>> u.path_segment(0) 82 | 'search' 83 | >>> u.path_segments() 84 | ('search',) 85 | >>> u.query_param('q') 86 | 'testing' 87 | >>> u.query_param('q', as_list=True) 88 | ['testing'] 89 | >>> u.query_param('lang', default='GB') 90 | 'GB' 91 | >>> u.query_params() 92 | {'q': ['testing']} 93 | >>> u.has_query_param('q') 94 | True 95 | >>> u.has_query_params(('q', 'r')) 96 | False 97 | >>> u.subdomains() 98 | ['www', 'google', 'com'] 99 | >>> u.subdomain(0) 100 | 'www' 101 | 102 | Note that each accessor method is overloaded to be a mutator method too, similar 103 | to the jQuery API. Eg: 104 | 105 | .. code:: python 106 | 107 | >>> u = URL.from_string('https://github.com/codeinthehole') 108 | 109 | # Access 110 | >>> u.path_segment(0) 111 | 'codeinthehole' 112 | 113 | # Mutate (creates a new instance) 114 | >>> new_url = u.path_segment(0, 'tangentlabs') 115 | >>> new_url is u 116 | False 117 | >>> new_url.path_segment(0) 118 | 'tangentlabs' 119 | 120 | Hence, you can build a URL up in steps: 121 | 122 | .. code:: python 123 | 124 | >>> u = URL().scheme('http').domain('www.example.com').path('/some/path').query_param('q', 'search term') 125 | >>> u.as_string() 126 | 'http://www.example.com/some/path?q=search+term' 127 | 128 | Along with the above overloaded methods, there is also a ``add_path_segment`` 129 | method for adding a segment at the end of the current path: 130 | 131 | .. code:: python 132 | 133 | >>> new_url = u.add_path_segment('here') 134 | >>> new_url.as_string() 135 | 'http://www.example.com/some/path/here?q=search+term' 136 | 137 | Couple of other things: 138 | 139 | * Since the URL class is immutable it can be used as a key in a dictionary 140 | * It can be pickled and restored 141 | * It supports equality operations 142 | * It supports equality operations 143 | 144 | URL templates can be used either via a ``Template`` class: 145 | 146 | .. code:: python 147 | 148 | >>> from purl import Template 149 | >>> tpl = Template("http://example.com{/list*}") 150 | >>> url = tpl.expand({'list': ['red', 'green', 'blue']}) 151 | >>> url.as_string() 152 | 'http://example.com/red/green/blue' 153 | 154 | or the ``expand`` function: 155 | 156 | .. code:: python 157 | 158 | >>> from purl import expand 159 | >>> expand(u"{/list*}", {'list': ['red', 'green', 'blue']}) 160 | '/red/green/blue' 161 | 162 | A wide variety of expansions are possible - refer to the RFC_ for more details. 163 | 164 | .. _RFC: http://tools.ietf.org/html/rfc6570 165 | 166 | Changelog 167 | --------- 168 | 169 | v1.6 - 2021-05-15 170 | ~~~~~~~~~~~~~~~~~ 171 | 172 | * Use `pytest` insteed of `nose`. 173 | * Fix warning around regex string. 174 | 175 | v1.5 - 2019-03-10 176 | ~~~~~~~~~~~~~~~~~ 177 | 178 | * Allow `@` in passwords. 179 | 180 | v1.4 - 2018-03-11 181 | ~~~~~~~~~~~~~~~~~ 182 | 183 | * Allow usernames and passwords to be removed from URLs. 184 | 185 | v1.3.1 186 | ~~~~~~ 187 | 188 | * Ensure paths always have a leading slash. 189 | 190 | v1.3 191 | ~~~~ 192 | 193 | * Allow absolute URLs to be converted into relative. 194 | 195 | v1.2 196 | ~~~~ 197 | 198 | * Support password-less URLs. 199 | * Allow slashes to be passed as path segments. 200 | 201 | v1.1 202 | ~~~~ 203 | 204 | * Support setting username and password via mutator methods 205 | 206 | v1.0.3 207 | ~~~~~~ 208 | 209 | * Handle some unicode compatibility edge-cases 210 | 211 | v1.0.2 212 | ~~~~~~ 213 | 214 | * Fix template expansion bug with no matching variables being passed in. This 215 | ensures ``purl.Template`` works correctly with the URLs returned from the 216 | Github API. 217 | 218 | v1.0.1 219 | ~~~~~~ 220 | 221 | * Fix bug with special characters in paths not being escaped. 222 | 223 | v1.0 224 | ~~~~ 225 | 226 | * Slight tidy up. Document support for PyPy and Python 3.4. 227 | 228 | v0.8 229 | ~~~~ 230 | 231 | * Support for RFC 6570 URI templates 232 | 233 | v0.7 234 | ~~~~ 235 | 236 | * All internal strings are unicode. 237 | * Support for unicode chars in path, fragment, query, auth added. 238 | 239 | v0.6 240 | ~~~~ 241 | 242 | * Added ``append_query_param`` method 243 | * Added ``remove_query_param`` method 244 | 245 | v0.5 246 | ~~~~ 247 | 248 | * Added support for Python 3.2/3.3 (thanks @pmcnr and @mitchellrj) 249 | 250 | v0.4.1 251 | ~~~~~~ 252 | 253 | * Added API docs 254 | * Added to readthedocs.org 255 | 256 | v0.4 257 | ~~~~ 258 | 259 | * Modified constructor to accept full URL string as first arg 260 | * Added ``add_path_segment`` method 261 | 262 | v0.3.2 263 | ~~~~~~ 264 | 265 | * Fixed bug port number in string when using from_string constructor 266 | 267 | v0.3.1 268 | ~~~~~~ 269 | 270 | * Fixed bug with passing lists to query param setter methods 271 | 272 | v0.3 273 | ~~~~ 274 | 275 | * Added support for comparison and equality 276 | * Added support for pickling 277 | * Added ``__slots__`` so instances can be used as keys within dictionaries 278 | 279 | Contribute 280 | ---------- 281 | 282 | Clone, create a virtualenv then install purl and the packages required for 283 | testing:: 284 | 285 | $ git clone git@github.com:codeinthehole/purl.git 286 | $ cd purl 287 | $ mkvirtualenv purl # requires virtualenvwrapper 288 | (purl) $ make 289 | 290 | Ensure tests pass using:: 291 | 292 | (purl) $ pytest 293 | 294 | or:: 295 | 296 | $ tox 297 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/purl.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/purl.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/purl" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/purl" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /docs/build_docs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | make html 3 | open _build/html/index.html -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # purl documentation build configuration file, created by 4 | # sphinx-quickstart on Thu Jun 14 15:41:49 2012. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | sys.path.insert(0, os.path.abspath('..')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = ['sphinx.ext.autodoc'] 29 | 30 | # Add any paths that contain templates here, relative to this directory. 31 | templates_path = ['_templates'] 32 | 33 | # The suffix of source filenames. 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | import datetime 44 | project = u'purl' 45 | copyright = u'%s, David Winterbottom' % datetime.date.today().year 46 | 47 | # The version info for the project you're documenting, acts as replacement for 48 | # |version| and |release|, also used in various other places throughout the 49 | # built documents. 50 | # 51 | # The short X.Y version. 52 | 53 | from purl import __version__ 54 | version = __version__ 55 | # The full version, including alpha/beta/rc tags. 56 | release = __version__ 57 | 58 | # The language for content autogenerated by Sphinx. Refer to documentation 59 | # for a list of supported languages. 60 | #language = None 61 | 62 | # There are two options for replacing |today|: either, you set today to some 63 | # non-false value, then it is used: 64 | #today = '' 65 | # Else, today_fmt is used as the format for a strftime call. 66 | #today_fmt = '%B %d, %Y' 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | exclude_patterns = ['_build'] 71 | 72 | # The reST default role (used for this markup: `text`) to use for all documents. 73 | #default_role = None 74 | 75 | # If true, '()' will be appended to :func: etc. cross-reference text. 76 | #add_function_parentheses = True 77 | 78 | # If true, the current module name will be prepended to all description 79 | # unit titles (such as .. function::). 80 | #add_module_names = True 81 | 82 | # If true, sectionauthor and moduleauthor directives will be shown in the 83 | # output. They are ignored by default. 84 | #show_authors = False 85 | 86 | # The name of the Pygments (syntax highlighting) style to use. 87 | pygments_style = 'sphinx' 88 | 89 | # A list of ignored prefixes for module index sorting. 90 | #modindex_common_prefix = [] 91 | 92 | 93 | # -- Options for HTML output --------------------------------------------------- 94 | 95 | # The theme to use for HTML and HTML Help pages. See the documentation for 96 | # a list of builtin themes. 97 | html_theme = 'default' 98 | 99 | # Theme options are theme-specific and customize the look and feel of a theme 100 | # further. For a list of options available for each theme, see the 101 | # documentation. 102 | #html_theme_options = {} 103 | 104 | # Add any paths that contain custom themes here, relative to this directory. 105 | #html_theme_path = [] 106 | 107 | # The name for this set of Sphinx documents. If None, it defaults to 108 | # " v documentation". 109 | #html_title = None 110 | 111 | # A shorter title for the navigation bar. Default is the same as html_title. 112 | #html_short_title = None 113 | 114 | # The name of an image file (relative to this directory) to place at the top 115 | # of the sidebar. 116 | #html_logo = None 117 | 118 | # The name of an image file (within the static path) to use as favicon of the 119 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 120 | # pixels large. 121 | #html_favicon = None 122 | 123 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 124 | # using the given strftime format. 125 | #html_last_updated_fmt = '%b %d, %Y' 126 | 127 | # If true, SmartyPants will be used to convert quotes and dashes to 128 | # typographically correct entities. 129 | #html_use_smartypants = True 130 | 131 | # Custom sidebar templates, maps document names to template names. 132 | #html_sidebars = {} 133 | 134 | # Additional templates that should be rendered to pages, maps page names to 135 | # template names. 136 | #html_additional_pages = {} 137 | 138 | # If false, no module index is generated. 139 | #html_domain_indices = True 140 | 141 | # If false, no index is generated. 142 | #html_use_index = True 143 | 144 | # If true, the index is split into individual pages for each letter. 145 | #html_split_index = False 146 | 147 | # If true, links to the reST sources are added to the pages. 148 | #html_show_sourcelink = True 149 | 150 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 151 | #html_show_sphinx = True 152 | 153 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 154 | #html_show_copyright = True 155 | 156 | # If true, an OpenSearch description file will be output, and all pages will 157 | # contain a tag referring to it. The value of this option must be the 158 | # base URL from which the finished HTML is served. 159 | #html_use_opensearch = '' 160 | 161 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 162 | #html_file_suffix = None 163 | 164 | # Output file base name for HTML help builder. 165 | htmlhelp_basename = 'purldoc' 166 | 167 | 168 | # -- Options for LaTeX output -------------------------------------------------- 169 | 170 | latex_elements = { 171 | # The paper size ('letterpaper' or 'a4paper'). 172 | #'papersize': 'letterpaper', 173 | 174 | # The font size ('10pt', '11pt' or '12pt'). 175 | #'pointsize': '10pt', 176 | 177 | # Additional stuff for the LaTeX preamble. 178 | #'preamble': '', 179 | } 180 | 181 | # Grouping the document tree into LaTeX files. List of tuples 182 | # (source start file, target name, title, author, documentclass [howto/manual]). 183 | latex_documents = [ 184 | ('index', 'purl.tex', u'purl Documentation', 185 | u'David Winterbottom', 'manual'), 186 | ] 187 | 188 | # The name of an image file (relative to this directory) to place at the top of 189 | # the title page. 190 | #latex_logo = None 191 | 192 | # For "manual" documents, if this is true, then toplevel headings are parts, 193 | # not chapters. 194 | #latex_use_parts = False 195 | 196 | # If true, show page references after internal links. 197 | #latex_show_pagerefs = False 198 | 199 | # If true, show URL addresses after external links. 200 | #latex_show_urls = False 201 | 202 | # Documents to append as an appendix to all manuals. 203 | #latex_appendices = [] 204 | 205 | # If false, no module index is generated. 206 | #latex_domain_indices = True 207 | 208 | 209 | # -- Options for manual page output -------------------------------------------- 210 | 211 | # One entry per manual page. List of tuples 212 | # (source start file, name, description, authors, manual section). 213 | man_pages = [ 214 | ('index', 'purl', u'purl Documentation', 215 | [u'David Winterbottom'], 1) 216 | ] 217 | 218 | # If true, show URL addresses after external links. 219 | #man_show_urls = False 220 | 221 | 222 | # -- Options for Texinfo output ------------------------------------------------ 223 | 224 | # Grouping the document tree into Texinfo files. List of tuples 225 | # (source start file, target name, title, author, 226 | # dir menu entry, description, category) 227 | texinfo_documents = [ 228 | ('index', 'purl', u'purl Documentation', 229 | u'David Winterbottom', 'purl', 'One line description of project.', 230 | 'Miscellaneous'), 231 | ] 232 | 233 | # Documents to append as an appendix to all manuals. 234 | #texinfo_appendices = [] 235 | 236 | # If false, no module index is generated. 237 | #texinfo_domain_indices = True 238 | 239 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 240 | #texinfo_show_urls = 'footnote' 241 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. purl documentation master file, created by 2 | sphinx-quickstart on Thu Jun 14 15:41:49 2012. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | ==== 7 | purl 8 | ==== 9 | 10 | :class:`purl.URL` is a simple, immutable URL class that can make your life 11 | easier. 12 | 13 | API 14 | === 15 | 16 | There's only two classes to be aware of. 17 | 18 | .. module:: purl 19 | 20 | .. autoclass:: URL 21 | :members: 22 | 23 | .. autoclass:: Template 24 | :members: 25 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | install: 2 | pip install -r requirements.txt 3 | python setup.py develop 4 | 5 | test: 6 | pytest 7 | 8 | package: clean 9 | # Test these packages in a fresh virtualenvs: 10 | # $ pip install --no-index dist/purl-0.8.tar.gz 11 | # $ pip install --use-wheel --no-index --find-links dist purl 12 | ./setup.py sdist 13 | ./setup.py bdist_wheel 14 | 15 | release: 16 | ./setup.py sdist upload 17 | ./setup.py bdist_wheel upload 18 | git push --tags 19 | 20 | clean: 21 | -rm -rf dist/ build/ *.egg-info 22 | -------------------------------------------------------------------------------- /purl/__init__.py: -------------------------------------------------------------------------------- 1 | from .url import URL # noqa 2 | from .template import expand, Template # noqa 3 | 4 | __version__ = '1.6' 5 | 6 | __all__ = ['URL', 'expand', 'Template'] 7 | -------------------------------------------------------------------------------- /purl/template.py: -------------------------------------------------------------------------------- 1 | import re 2 | import functools 3 | 4 | try: 5 | from urllib.parse import quote 6 | except ImportError: 7 | # Python 2 8 | from urllib import quote 9 | 10 | from . import url 11 | 12 | 13 | __all__ = ['Template', 'expand'] 14 | 15 | 16 | patterns = re.compile(r"{([^\}]+)}") 17 | 18 | 19 | class Template(object): 20 | 21 | def __init__(self, url_str): 22 | self._base = url_str 23 | 24 | def __str__(self): 25 | return 'Template: %s' % self._base 26 | 27 | def expand(self, variables=None): 28 | return url.URL(expand(self._base, variables)) 29 | 30 | 31 | def expand(template, variables=None): 32 | """ 33 | Expand a URL template string using the passed variables 34 | """ 35 | if variables is None: 36 | variables = {} 37 | return patterns.sub(functools.partial(_replace, variables), template) 38 | 39 | 40 | # Utils 41 | 42 | def _flatten(container): 43 | """ 44 | _flatten a sequence of sequences into a single list 45 | """ 46 | _flattened = [] 47 | for sequence in container: 48 | _flattened.extend(sequence) 49 | return _flattened 50 | 51 | # Format functions 52 | # ---------------- 53 | # These are responsible for formatting the (key, value) pair into a string 54 | 55 | 56 | def _format_pair_no_equals(explode, separator, escape, key, value): 57 | """ 58 | Format a key, value pair but don't include the equals sign 59 | when there is no value 60 | """ 61 | if not value: 62 | return key 63 | return _format_pair(explode, separator, escape, key, value) 64 | 65 | 66 | def _format_pair_with_equals(explode, separator, escape, key, value): 67 | """ 68 | Format a key, value pair including the equals sign 69 | when there is no value 70 | """ 71 | if not value: 72 | return key + '=' 73 | return _format_pair(explode, separator, escape, key, value) 74 | 75 | 76 | def _format_pair(explode, separator, escape, key, value): 77 | if isinstance(value, (list, tuple)): 78 | join_char = "," 79 | if explode: 80 | join_char = separator 81 | try: 82 | dict(value) 83 | except: 84 | # Scalar container 85 | if explode: 86 | items = ["%s=%s" % (key, escape(v)) for v in value] 87 | return join_char.join(items) 88 | else: 89 | escaped_value = join_char.join(map(escape, value)) 90 | else: 91 | # Tuple container 92 | if explode: 93 | items = ["%s=%s" % (k, escape(v)) for (k, v) in value] 94 | return join_char.join(items) 95 | else: 96 | items = _flatten(value) 97 | escaped_value = join_char.join(map(escape, items)) 98 | else: 99 | escaped_value = escape(value) 100 | return '%s=%s' % (key, escaped_value) 101 | 102 | 103 | def _format_default(explode, separator, escape, key, value): 104 | if isinstance(value, (list, tuple)): 105 | join_char = "," 106 | if explode: 107 | join_char = separator 108 | try: 109 | dict(value) 110 | except: 111 | # Scalar container 112 | escaped_value = join_char.join(map(escape, value)) 113 | else: 114 | # Tuple container 115 | if explode: 116 | items = ["%s=%s" % (k, escape(v)) for (k, v) in value] 117 | escaped_value = join_char.join(items) 118 | else: 119 | items = _flatten(value) 120 | escaped_value = join_char.join(map(escape, items)) 121 | else: 122 | escaped_value = escape(value) 123 | return escaped_value 124 | 125 | 126 | # Modifer functions 127 | # ----------------- 128 | # These are responsible for modifying the variable before formatting 129 | 130 | _identity = lambda x: x 131 | 132 | 133 | def _truncate(string, num_chars): 134 | return string[:num_chars] 135 | 136 | 137 | # Splitting functions 138 | # ------------------- 139 | # These are responsible for splitting a string into a sequence of (key, 140 | # modifier) tuples 141 | 142 | 143 | def _split_basic(string): 144 | """ 145 | Split a string into a list of tuples of the form (key, modifier_fn, 146 | explode) where modifier_fn is a function that applies the appropriate 147 | modification to the variable. 148 | """ 149 | tuples = [] 150 | for word in string.split(','): 151 | # Attempt to split on colon 152 | parts = word.split(':', 2) 153 | key, modifier_fn, explode = parts[0], _identity, False 154 | if len(parts) > 1: 155 | modifier_fn = functools.partial( 156 | _truncate, num_chars=int(parts[1])) 157 | if word[len(word) - 1] == '*': 158 | key = word[:len(word) - 1] 159 | explode = True 160 | tuples.append((key, modifier_fn, explode)) 161 | return tuples 162 | 163 | 164 | def _split_operator(string): 165 | return _split_basic(string[1:]) 166 | 167 | 168 | # Escaping functions 169 | # ------------------ 170 | 171 | 172 | def _escape_all(value): 173 | return url.unicode_quote(value, safe="") 174 | 175 | 176 | def _escape_reserved(value): 177 | return url.unicode_quote(value, safe="/!,.;") 178 | 179 | # Operator map 180 | # ------------ 181 | # A mapping of: 182 | # operator -> (prefix, separator, split_fn, escape_fn, format_fn) 183 | operator_map = { 184 | '+': ('', ',', _split_operator, _escape_reserved, _format_default), 185 | '#': ('#', ',', _split_operator, _escape_reserved, _format_default), 186 | '.': ('.', '.', _split_operator, _escape_all, _format_default), 187 | '/': ('/', '/', _split_operator, _escape_all, _format_default), 188 | ';': (';', ';', _split_operator, _escape_all, _format_pair_no_equals), 189 | '?': ('?', '&', _split_operator, _escape_all, _format_pair_with_equals), 190 | '&': ('&', '&', _split_operator, _escape_all, _format_pair_with_equals), 191 | } 192 | defaults = ('', ',', _split_basic, _escape_all, _format_default) 193 | 194 | 195 | def _replace(variables, match): 196 | """ 197 | Return the appropriate replacement for `match` using the passed variables 198 | """ 199 | expression = match.group(1) 200 | 201 | # Look-up chars and functions for the specified operator 202 | (prefix_char, separator_char, split_fn, escape_fn, 203 | format_fn) = operator_map.get(expression[0], defaults) 204 | 205 | replacements = [] 206 | for key, modify_fn, explode in split_fn(expression): 207 | if key in variables: 208 | variable = modify_fn(variables[key]) 209 | replacement = format_fn( 210 | explode, separator_char, escape_fn, key, variable) 211 | replacements.append(replacement) 212 | if not replacements: 213 | return '' 214 | return prefix_char + separator_char.join(replacements) 215 | -------------------------------------------------------------------------------- /purl/url.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | try: 4 | from urllib.parse import parse_qs, urlencode, urlparse, quote, unquote 5 | except ImportError: 6 | from urllib import urlencode, quote, unquote 7 | from urlparse import parse_qs, urlparse 8 | from collections import namedtuple 9 | 10 | import six 11 | 12 | 13 | # To minimise memory consumption, we use a namedtuple to store all instance 14 | # variables, as well as using the __slots__ attribute. 15 | _URLTuple = namedtuple( 16 | "_URLTuple", "host username password scheme port path query fragment") 17 | 18 | 19 | # Encoding helpers 20 | 21 | 22 | def to_unicode(string): 23 | """ 24 | Ensure a passed string is unicode 25 | """ 26 | if isinstance(string, six.binary_type): 27 | return string.decode('utf8') 28 | if isinstance(string, six.text_type): 29 | return string 30 | if six.PY2: 31 | return unicode(string) 32 | return str(string) 33 | 34 | 35 | def to_utf8(string): 36 | """ 37 | Encode a string as a UTF8 bytestring. This function could be passed a 38 | bytestring or unicode string so must distinguish between the two. 39 | """ 40 | if isinstance(string, six.text_type): 41 | return string.encode('utf8') 42 | if isinstance(string, six.binary_type): 43 | return string 44 | return str(string) 45 | 46 | 47 | def dict_to_unicode(raw_dict): 48 | """ 49 | Ensure all keys and values in a dict are unicode. 50 | 51 | The passed dict is assumed to have lists for all values. 52 | """ 53 | decoded = {} 54 | for key, value in raw_dict.items(): 55 | decoded[to_unicode(key)] = map( 56 | to_unicode, value) 57 | return decoded 58 | 59 | 60 | def unicode_quote(string, safe='/'): 61 | if string is None: 62 | return None 63 | return quote(to_utf8(string), to_utf8(safe)) 64 | 65 | 66 | def unicode_quote_path_segment(string): 67 | if string is None: 68 | return None 69 | return quote(to_utf8(string), safe=to_utf8("")) 70 | 71 | 72 | def unicode_unquote(string): 73 | if string is None: 74 | return None 75 | if six.PY3: 76 | return unquote(string) 77 | return to_unicode(unquote(to_utf8(string))) 78 | 79 | 80 | def unicode_urlencode(query, doseq=True): 81 | """ 82 | Custom wrapper around urlencode to support unicode 83 | 84 | Python urlencode doesn't handle unicode well so we need to convert to 85 | bytestrings before using it: 86 | http://stackoverflow.com/questions/6480723/urllib-urlencode-doesnt-like-unicode-values-how-about-this-workaround 87 | """ 88 | pairs = [] 89 | for key, value in query.items(): 90 | if isinstance(value, list): 91 | value = list(map(to_utf8, value)) 92 | else: 93 | value = to_utf8(value) 94 | pairs.append((to_utf8(key), value)) 95 | encoded_query = dict(pairs) 96 | xx = urlencode(encoded_query, doseq) 97 | return xx 98 | 99 | 100 | def parse(url_str): 101 | """ 102 | Extract all parts from a URL string and return them as a dictionary 103 | """ 104 | url_str = to_unicode(url_str) 105 | result = urlparse(url_str) 106 | netloc_parts = result.netloc.rsplit('@', 1) 107 | if len(netloc_parts) == 1: 108 | username = password = None 109 | host = netloc_parts[0] 110 | else: 111 | user_and_pass = netloc_parts[0].split(':') 112 | if len(user_and_pass) == 2: 113 | username, password = user_and_pass 114 | elif len(user_and_pass) == 1: 115 | username = user_and_pass[0] 116 | password = None 117 | host = netloc_parts[1] 118 | 119 | if host and ':' in host: 120 | host = host.split(':')[0] 121 | 122 | return {'host': host, 123 | 'username': username, 124 | 'password': password, 125 | 'scheme': result.scheme, 126 | 'port': result.port, 127 | 'path': result.path, 128 | 'query': result.query, 129 | 'fragment': result.fragment} 130 | 131 | 132 | class URL(object): 133 | """ 134 | The constructor can be used in two ways: 135 | 136 | 1. Pass a URL string:: 137 | 138 | >>> URL('http://www.google.com/search?q=testing').as_string() 139 | 'http://www.google.com/search?q=testing' 140 | 141 | 2. Pass keyword arguments:: 142 | 143 | >>> URL(host='www.google.com', path='/search', query='q=testing').as_string() 144 | 'http://www.google.com/search?q=testing' 145 | 146 | If you pass both a URL string and keyword args, then the values of keyword 147 | args take precedence. 148 | """ 149 | 150 | __slots__ = ("_tuple",) 151 | 152 | def __init__(self, url_str=None, host=None, username=None, password=None, 153 | scheme=None, port=None, path=None, query=None, fragment=None): 154 | if url_str is not None: 155 | params = parse(url_str) 156 | else: 157 | # Defaults 158 | params = {'scheme': 'http', 159 | 'username': None, 160 | 'password': None, 161 | 'host': None, 162 | 'port': None, 163 | 'path': '/', 164 | 'query': None, 165 | 'fragment': None} 166 | 167 | # Ensure path starts with a slash 168 | if path and not path.startswith("/"): 169 | path = "/%s" % path 170 | 171 | # Kwargs override the url_str 172 | for var in 'host username password scheme port path query fragment'.split(): 173 | if locals()[var] is not None: 174 | params[var] = locals()[var] 175 | 176 | # Store the various components in %-encoded form 177 | self._tuple = _URLTuple(params['host'], 178 | unicode_quote(params['username']), 179 | unicode_quote(params['password']), 180 | params['scheme'], 181 | params['port'], 182 | params['path'], 183 | params['query'], 184 | unicode_quote(params['fragment'])) 185 | 186 | def __eq__(self, other): 187 | return self._tuple == other._tuple 188 | 189 | def __ne__(self, other): 190 | return self._tuple != other._tuple 191 | 192 | def __getstate__(self): 193 | return tuple(self._tuple) 194 | 195 | def __setstate__(self, state): 196 | self._tuple = _URLTuple(*state) 197 | 198 | def __hash__(self): 199 | return hash(self._tuple) 200 | 201 | def __repr__(self): 202 | return str(self._tuple) 203 | 204 | def __unicode__(self): 205 | url = self._tuple 206 | parts = ["%s://" % url.scheme if url.scheme else '', 207 | self.netloc(), 208 | url.path, 209 | '?%s' % url.query if url.query else '', 210 | '#%s' % url.fragment if url.fragment else ''] 211 | if not url.host: 212 | return ''.join(parts[2:]) 213 | return ''.join(parts) 214 | 215 | __str__ = as_string = __unicode__ 216 | 217 | # Accessors / Mutators 218 | # These use the jQuery overloading style whereby they become mutators if 219 | # extra args are passed 220 | 221 | def netloc(self): 222 | """ 223 | Return the netloc 224 | """ 225 | url = self._tuple 226 | if url.username and url.password: 227 | netloc = '%s:%s@%s' % (url.username, url.password, url.host) 228 | elif url.username and not url.password: 229 | netloc = '%s@%s' % (url.username, url.host) 230 | else: 231 | netloc = url.host 232 | if url.port: 233 | netloc = '%s:%s' % (netloc, url.port) 234 | return netloc 235 | 236 | def host(self, value=None): 237 | """ 238 | Return the host 239 | 240 | :param string value: new host string 241 | """ 242 | if value is not None: 243 | return URL._mutate(self, host=value) 244 | return self._tuple.host 245 | 246 | domain = host 247 | 248 | def username(self, value=None): 249 | """ 250 | Return or set the username 251 | 252 | :param string value: the new username to use 253 | :returns: string or new :class:`URL` instance 254 | """ 255 | if value is not None: 256 | return URL._mutate(self, username=value) 257 | return unicode_unquote(self._tuple.username) 258 | 259 | def password(self, value=None): 260 | """ 261 | Return or set the password 262 | 263 | :param string value: the new password to use 264 | :returns: string or new :class:`URL` instance 265 | """ 266 | if value is not None: 267 | return URL._mutate(self, password=value) 268 | return unicode_unquote(self._tuple.password) 269 | 270 | def subdomains(self, value=None): 271 | """ 272 | Returns a list of subdomains or set the subdomains and returns a 273 | new :class:`URL` instance. 274 | 275 | :param list value: a list of subdomains 276 | """ 277 | if value is not None: 278 | return URL._mutate(self, host='.'.join(value)) 279 | return self.host().split('.') 280 | 281 | def subdomain(self, index, value=None): 282 | """ 283 | Return a subdomain or set a new value and return a new :class:`URL` 284 | instance. 285 | 286 | :param integer index: 0-indexed subdomain 287 | :param string value: New subdomain 288 | """ 289 | if value is not None: 290 | subdomains = self.subdomains() 291 | subdomains[index] = value 292 | return URL._mutate(self, host='.'.join(subdomains)) 293 | return self.subdomains()[index] 294 | 295 | def scheme(self, value=None): 296 | """ 297 | Return or set the scheme. 298 | 299 | :param string value: the new scheme to use 300 | :returns: string or new :class:`URL` instance 301 | """ 302 | if value is not None: 303 | return URL._mutate(self, scheme=value) 304 | return self._tuple.scheme 305 | 306 | def path(self, value=None): 307 | """ 308 | Return or set the path 309 | 310 | :param string value: the new path to use 311 | :returns: string or new :class:`URL` instance 312 | """ 313 | if value is not None: 314 | if not value.startswith('/'): 315 | value = '/' + value 316 | encoded_value = unicode_quote(value) 317 | return URL._mutate(self, path=encoded_value) 318 | return self._tuple.path 319 | 320 | def query(self, value=None): 321 | """ 322 | Return or set the query string 323 | 324 | :param string value: the new query string to use 325 | :returns: string or new :class:`URL` instance 326 | """ 327 | if value is not None: 328 | return URL._mutate(self, query=value) 329 | return self._tuple.query 330 | 331 | def port(self, value=None): 332 | """ 333 | Return or set the port 334 | 335 | :param string value: the new port to use 336 | :returns: string or new :class:`URL` instance 337 | """ 338 | if value is not None: 339 | return URL._mutate(self, port=value) 340 | return self._tuple.port 341 | 342 | def fragment(self, value=None): 343 | """ 344 | Return or set the fragment (hash) 345 | 346 | :param string value: the new fragment to use 347 | :returns: string or new :class:`URL` instance 348 | """ 349 | if value is not None: 350 | return URL._mutate(self, fragment=value) 351 | return unicode_unquote(self._tuple.fragment) 352 | 353 | def relative(self): 354 | """ 355 | Return a relative URL object (eg strip the protocol and host) 356 | 357 | :returns: new :class:`URL` instance 358 | """ 359 | return URL._mutate(self, scheme=None, host=None) 360 | 361 | # ==== 362 | # Path 363 | # ==== 364 | 365 | def path_segment(self, index, value=None, default=None): 366 | """ 367 | Return the path segment at the given index 368 | 369 | :param integer index: 370 | :param string value: the new segment value 371 | :param string default: the default value to return if no path segment exists with the given index 372 | """ 373 | if value is not None: 374 | segments = list(self.path_segments()) 375 | segments[index] = unicode_quote_path_segment(value) 376 | new_path = '/' + '/'.join(segments) 377 | if self._tuple.path.endswith('/'): 378 | new_path += '/' 379 | return URL._mutate(self, path=new_path) 380 | try: 381 | return self.path_segments()[index] 382 | except IndexError: 383 | return default 384 | 385 | def path_segments(self, value=None): 386 | """ 387 | Return the path segments 388 | 389 | :param list value: the new path segments to use 390 | """ 391 | if value is not None: 392 | encoded_values = map(unicode_quote_path_segment, value) 393 | new_path = '/' + '/'.join(encoded_values) 394 | return URL._mutate(self, path=new_path) 395 | parts = self._tuple.path.split('/') 396 | segments = parts[1:] 397 | if self._tuple.path.endswith('/'): 398 | segments.pop() 399 | segments = map(unicode_unquote, segments) 400 | return tuple(segments) 401 | 402 | def add_path_segment(self, value): 403 | """ 404 | Add a new path segment to the end of the current string 405 | 406 | :param string value: the new path segment to use 407 | 408 | Example:: 409 | 410 | >>> u = URL('http://example.com/foo/') 411 | >>> u.add_path_segment('bar').as_string() 412 | 'http://example.com/foo/bar' 413 | """ 414 | segments = self.path_segments() + (to_unicode(value),) 415 | return self.path_segments(segments) 416 | 417 | # ============ 418 | # Query params 419 | # ============ 420 | 421 | def has_query_param(self, key): 422 | """ 423 | Test if a given query parameter is present 424 | 425 | :param string key: key to test for 426 | """ 427 | return self.query_param(key) is not None 428 | 429 | def has_query_params(self, keys): 430 | """ 431 | Test if a given set of query parameters are present 432 | 433 | :param list keys: keys to test for 434 | """ 435 | return all([self.has_query_param(k) for k in keys]) 436 | 437 | def query_param(self, key, value=None, default=None, as_list=False): 438 | """ 439 | Return or set a query parameter for the given key 440 | 441 | The value can be a list. 442 | 443 | :param string key: key to look for 444 | :param string default: value to return if ``key`` isn't found 445 | :param boolean as_list: whether to return the values as a list 446 | :param string value: the new query parameter to use 447 | """ 448 | parse_result = self.query_params() 449 | if value is not None: 450 | # Need to ensure all strings are unicode 451 | if isinstance(value, (list, tuple)): 452 | value = list(map(to_unicode, value)) 453 | else: 454 | value = to_unicode(value) 455 | parse_result[to_unicode(key)] = value 456 | return URL._mutate( 457 | self, query=unicode_urlencode(parse_result, doseq=True)) 458 | 459 | try: 460 | result = parse_result[key] 461 | except KeyError: 462 | return default 463 | if as_list: 464 | return result 465 | return result[0] if len(result) == 1 else result 466 | 467 | def append_query_param(self, key, value): 468 | """ 469 | Append a query parameter 470 | 471 | :param string key: The query param key 472 | :param string value: The new value 473 | """ 474 | values = self.query_param(key, as_list=True, default=[]) 475 | values.append(value) 476 | return self.query_param(key, values) 477 | 478 | def query_params(self, value=None): 479 | """ 480 | Return or set a dictionary of query params 481 | 482 | :param dict value: new dictionary of values 483 | """ 484 | if value is not None: 485 | return URL._mutate(self, query=unicode_urlencode(value, doseq=True)) 486 | query = '' if self._tuple.query is None else self._tuple.query 487 | 488 | # In Python 2.6, urlparse needs a bytestring so we encode and then 489 | # decode the result. 490 | if not six.PY3: 491 | result = parse_qs(to_utf8(query), True) 492 | return dict_to_unicode(result) 493 | 494 | return parse_qs(query, True) 495 | 496 | def remove_query_param(self, key, value=None): 497 | """ 498 | Remove a query param from a URL 499 | 500 | Set the value parameter if removing from a list. 501 | 502 | :param string key: The key to delete 503 | :param string value: The value of the param to delete (of more than one) 504 | """ 505 | parse_result = self.query_params() 506 | if value is not None: 507 | index = parse_result[key].index(value) 508 | del parse_result[key][index] 509 | else: 510 | del parse_result[key] 511 | return URL._mutate(self, query=unicode_urlencode(parse_result, doseq=True)) 512 | 513 | # ======= 514 | # Helpers 515 | # ======= 516 | 517 | @classmethod 518 | def _mutate(cls, url, **kwargs): 519 | args = url._tuple._asdict() 520 | args.update(kwargs) 521 | return cls(**args) 522 | 523 | @classmethod 524 | def from_string(cls, url_str): 525 | """ 526 | Factory method to create a new instance based on a passed string 527 | 528 | This method is deprecated now 529 | """ 530 | return cls(url_str) 531 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = -vv --doctest-modules --doctest-glob='*.rst' 3 | doctest_optionflags=ALLOW_UNICODE 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Packaging 2 | pip==19.0.3 3 | setuptools==40.8.0 4 | wheel==0.33.1 5 | 6 | # Testing 7 | pytest 8 | tox==3.7.0 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal = 1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from setuptools import setup, find_packages 3 | 4 | setup( 5 | name='purl', 6 | version='1.6', 7 | description=( 8 | "An immutable URL class for easy URL-building and manipulation"), 9 | long_description=open('README.rst').read(), 10 | url='https://github.com/codeinthehole/purl', 11 | license='MIT', 12 | author="David Winterbottom", 13 | author_email="david.winterbottom@gmail.com", 14 | packages=find_packages(exclude=['tests']), 15 | install_requires=['six'], 16 | include_package_data=True, 17 | classifiers=[ 18 | 'Development Status :: 5 - Production/Stable', 19 | 'Intended Audience :: Developers', 20 | 'License :: OSI Approved :: MIT License', 21 | 'Programming Language :: Python :: 2', 22 | 'Programming Language :: Python', 23 | 'Programming Language :: Python :: 2.6', 24 | 'Programming Language :: Python :: 2.7', 25 | 'Programming Language :: Python :: 3', 26 | 'Programming Language :: Python :: 3.5', 27 | 'Programming Language :: Python :: 3.6', 28 | 'Programming Language :: Python :: 3.7', 29 | 'Programming Language :: Python :: 3.8', 30 | 'Programming Language :: Python :: Implementation :: PyPy', 31 | 'Topic :: Software Development :: Libraries :: Python Modules', 32 | ], 33 | ) 34 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeinthehole/purl/2bd51cabecfd4dcd20544fba7092cfd98dc7dac0/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_expansion.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import collections 3 | 4 | import pytest 5 | 6 | from purl.template import expand 7 | 8 | # Define variables as in the RFC (http://tools.ietf.org/html/rfc6570) 9 | level1_vars = { 10 | 'var': 'value', 11 | 'hello': 'Hello World!', 12 | } 13 | level2_vars = level1_vars.copy() 14 | level2_vars.update({ 15 | 'path': '/foo/bar' 16 | }) 17 | level3_vars = level2_vars.copy() 18 | level3_vars.update({ 19 | 'empty': '', 20 | 'x': '1024', 21 | 'y': '768' 22 | }) 23 | level4_vars = level2_vars.copy() 24 | level4_vars.update({ 25 | 'list': ['red', 'green', 'blue'], 26 | 'keys': [('semi', ';'), ('dot', '.'), ('comma', ',')] 27 | }) 28 | 29 | data = [ 30 | # Level 1 31 | ('{var}', level1_vars, 'value'), 32 | ('{hello}', level1_vars, 'Hello%20World%21'), 33 | # Level 2 - reserved expansion 34 | ('{+var}', level2_vars, 'value'), 35 | ('{+hello}', level2_vars, 'Hello%20World!'), 36 | ('{+path}/here', level2_vars, '/foo/bar/here'), 37 | ('here?ref={+path}', level2_vars, 'here?ref=/foo/bar'), 38 | # Level 2 - fragment expansion 39 | ('X{#var}', level2_vars, 'X#value'), 40 | ('X{#hello}', level2_vars, 'X#Hello%20World!'), 41 | # Level 3 - string expansion with multiple variables 42 | ('map?{x,y}', level3_vars, 'map?1024,768'), 43 | ('{x,hello,y}', level3_vars, '1024,Hello%20World%21,768'), 44 | # Level 3 - reserved expansion with multiple variables 45 | ('{+x,hello,y}', level3_vars, '1024,Hello%20World!,768'), 46 | ('{+path,x}/here', level3_vars, '/foo/bar,1024/here'), 47 | # Level 3 - fragment expansion with multiple variables 48 | ('{#x,hello,y}', level3_vars, '#1024,Hello%20World!,768'), 49 | ('{#path,x}/here', level3_vars, '#/foo/bar,1024/here'), 50 | # Level 3 - label expansion 51 | ('X{.var}', level3_vars, 'X.value'), 52 | ('X{.x,y}', level3_vars, 'X.1024.768'), 53 | # Level 3 - path segments, slash prefixed 54 | ('{/var}', level3_vars, '/value'), 55 | ('{/nokey}', level3_vars, ''), 56 | ('{/var,x}/here', level3_vars, '/value/1024/here'), 57 | # Level 3 - path segments, semi-colon prefixed 58 | ('{;x,y}', level3_vars, ';x=1024;y=768'), 59 | ('{;x,y,empty}', level3_vars, ';x=1024;y=768;empty'), 60 | # Level 3 - form-style query, ampersand-separated 61 | ('{?x,y}', level3_vars, '?x=1024&y=768'), 62 | ('{?x,y,empty}', level3_vars, '?x=1024&y=768&empty='), 63 | # Level 3 - form-style query continuation 64 | ('?fixed=yes{&x}', level3_vars, '?fixed=yes&x=1024'), 65 | ('{&x,y,empty}', level3_vars, '&x=1024&y=768&empty='), 66 | # Level 4 - string expansion with value modifiers 67 | ('{var:3}', level4_vars, 'val'), 68 | ('{var:30}', level4_vars, 'value'), 69 | ('{list}', level4_vars, 'red,green,blue'), 70 | ('{list*}', level4_vars, 'red,green,blue'), 71 | ('{keys}', level4_vars, 'semi,%3B,dot,.,comma,%2C'), 72 | ('{keys*}', level4_vars, 'semi=%3B,dot=.,comma=%2C'), 73 | # Level 4 - reserved expansion with value modifiers 74 | ('{+path:6}/here', level4_vars, '/foo/b/here'), 75 | ('{+list}', level4_vars, 'red,green,blue'), 76 | ('{+list*}', level4_vars, 'red,green,blue'), 77 | ('{+keys}', level4_vars, 'semi,;,dot,.,comma,,'), 78 | ('{+keys*}', level4_vars, 'semi=;,dot=.,comma=,'), 79 | # Level 4 - fragment expansion with value modifiers 80 | ('{#path:6}/here', level4_vars, '#/foo/b/here'), 81 | ('{#list}', level4_vars, '#red,green,blue'), 82 | ('{#list*}', level4_vars, '#red,green,blue'), 83 | ('{#keys}', level4_vars, '#semi,;,dot,.,comma,,'), 84 | ('{#keys*}', level4_vars, '#semi=;,dot=.,comma=,'), 85 | # Level 4 - label expansion, dot-prefixed 86 | ('X{.var:3}', level4_vars, 'X.val'), 87 | ('X{.list}', level4_vars, 'X.red,green,blue'), 88 | ('X{.list*}', level4_vars, 'X.red.green.blue'), 89 | ('X{.keys}', level4_vars, 'X.semi,%3B,dot,.,comma,%2C'), 90 | ('X{.keys*}', level4_vars, 'X.semi=%3B.dot=..comma=%2C'), 91 | # Level 4 - path segments, slash-prefixed 92 | ('{/var:1,var}', level4_vars, '/v/value'), 93 | ('{/list}', level4_vars, '/red,green,blue'), 94 | ('{/list*}', level4_vars, '/red/green/blue'), 95 | ('{/list*,path:4}', level4_vars, '/red/green/blue/%2Ffoo'), 96 | ('{/keys}', level4_vars, '/semi,%3B,dot,.,comma,%2C'), 97 | ('{/keys*}', level4_vars, '/semi=%3B/dot=./comma=%2C'), 98 | # Level 4 - path-style parameters, semicolon-prefixed 99 | ('{;hello:5}', level4_vars, ';hello=Hello'), 100 | ('{;list}', level4_vars, ';list=red,green,blue'), 101 | ('{;list*}', level4_vars, ';list=red;list=green;list=blue'), 102 | ('{;keys}', level4_vars, ';keys=semi,%3B,dot,.,comma,%2C'), 103 | ('{;keys*}', level4_vars, ';semi=%3B;dot=.;comma=%2C'), 104 | # Level 4 - form-style query, ampersand-separated 105 | ('{?var:3}', level4_vars, '?var=val'), 106 | ('{?list}', level4_vars, '?list=red,green,blue'), 107 | ('{?list*}', level4_vars, '?list=red&list=green&list=blue'), 108 | ('{?keys}', level4_vars, '?keys=semi,%3B,dot,.,comma,%2C'), 109 | ('{?keys*}', level4_vars, '?semi=%3B&dot=.&comma=%2C'), 110 | # Level 4 - form-style query continuation 111 | ('{&var:3}', level4_vars, '&var=val'), 112 | ('{&list}', level4_vars, '&list=red,green,blue'), 113 | ('{&list*}', level4_vars, '&list=red&list=green&list=blue'), 114 | ('{&keys}', level4_vars, '&keys=semi,%3B,dot,.,comma,%2C'), 115 | ('{&keys*}', level4_vars, '&semi=%3B&dot=.&comma=%2C'), 116 | ] 117 | 118 | 119 | @pytest.mark.parametrize("template, fields, expected", data) 120 | def test_assert_expansion(template, fields, expected): 121 | assert expand(template, fields) == expected 122 | 123 | def test_unicode(): 124 | expand('{/name}', {'name': u'⚐ hello'}) 125 | -------------------------------------------------------------------------------- /tests/test_template.py: -------------------------------------------------------------------------------- 1 | import purl 2 | 3 | 4 | class TestTemplate: 5 | 6 | def test_basic_expansion(self): 7 | template = purl.Template('http://example.com{+path,x}/here') 8 | url = template.expand({'path': '/foo/bar', 'x': 1024}) 9 | assert 'http://example.com/foo/bar,1024/here' == url.as_string() 10 | 11 | def test_github_api_expansion(self): 12 | template = purl.Template( 13 | 'https://api.github.com/repos/codeinthehole/purl/labels{/name}') 14 | url = template.expand() 15 | assert 'https://api.github.com/repos/codeinthehole/purl/labels' == url.as_string() 16 | -------------------------------------------------------------------------------- /tests/test_url.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | from purl import URL 4 | import pytest 5 | 6 | import pickle 7 | 8 | try: 9 | from urllib.parse import quote 10 | except ImportError: 11 | from urllib import quote 12 | 13 | 14 | class TestConstructor: 15 | def test_url_can_be_created_with_just_host(self): 16 | u = URL(host="google.com") 17 | assert "http://google.com/" == str(u) 18 | 19 | def test_url_can_be_created_with_host_and_schema(self): 20 | u = URL(host="google.com", scheme="https") 21 | assert "https://google.com/" == str(u) 22 | 23 | def test_url_can_be_created_with_host_and_post(self): 24 | u = URL(host="localhost", port=8000) 25 | assert "http://localhost:8000/" == str(u) 26 | 27 | def test_url_can_be_created_with_username_only(self): 28 | u = URL( 29 | scheme="postgres", 30 | username="user", 31 | host="127.0.0.1", 32 | port="5432", 33 | path="/db_name", 34 | ) 35 | assert "postgres://user@127.0.0.1:5432/db_name" == str(u) 36 | 37 | def test_no_args_to_constructor(self): 38 | u = URL() 39 | assert "/" == str(u) 40 | 41 | def test_as_string(self): 42 | assert "/" == URL().as_string() 43 | 44 | def test_full_url_can_be_used_as_first_param(self): 45 | u = URL("https://github.com") 46 | assert "https://github.com" == u.as_string() 47 | 48 | def test_kwargs_take_priority_when_used_with_full_url(self): 49 | u = URL("https://github.com", scheme="http") 50 | assert "http://github.com" == u.as_string() 51 | 52 | def test_creation_with_host_and_path(self): 53 | u = URL(host="localhost", path="boo") 54 | assert "http://localhost/boo" == str(u) 55 | 56 | def test_creation_with_host_and_path_2(self): 57 | u = URL(host="localhost").add_path_segment("boo") 58 | assert "http://localhost/boo" == str(u) 59 | 60 | 61 | class TestMoreFactory: 62 | def test_extracting_query_param(self): 63 | url_str = "https://www.sandbox.paypal.com/webscr?cmd=_express-checkout&token=EC-6469953681606921P&AMT=200&CURRENCYCODE=GBP&RETURNURL=http%3A%2F%2Fexample.com%2Fcheckout%2Fpaypal%2Fresponse%2Fsuccess%2F&CANCELURL=http%3A%2F%2Fexample.com%2Fcheckout%2Fpaypal%2Fresponse%2Fcancel%2F" 64 | url = URL.from_string(url_str) 65 | return_url = url.query_param("RETURNURL") 66 | assert "http://example.com/checkout/paypal/response/success/" == return_url 67 | 68 | 69 | class TestFactory: 70 | 71 | url_str = "http://www.google.com/search/?q=testing#fragment" 72 | url = URL.from_string(url_str) 73 | 74 | def test_scheme(self): 75 | assert "http" == self.url.scheme() 76 | 77 | def test_fragment(self): 78 | assert "fragment" == self.url.fragment() 79 | 80 | def test_path(self): 81 | assert "/search/" == self.url.path() 82 | 83 | def test_host(self): 84 | assert "www.google.com" == self.url.host() 85 | 86 | def test_string_version(self): 87 | assert self.url_str == str(self.url) 88 | 89 | 90 | class TestEdgeCaseExtraction: 91 | def test_no_equals_sign_means_empty_string(self): 92 | url = URL.from_string("http://www.google.com/blog/article/1?q") 93 | assert "" == url.query_param("q") 94 | 95 | def test_list_extraction(self): 96 | url = URL.from_string("http://www.google.com/?q=1&q=2&q=3") 97 | assert ["1" == "2", "3"], url.query_param("q") 98 | 99 | def test_username_extraction(self): 100 | url = URL.from_string("ftp://user:pw@ftp.host") 101 | assert "user" == url.username() 102 | assert "pw" == url.password() 103 | 104 | def test_username_in_unicode_repr(self): 105 | u = "ftp://user:pw@ftp.host" 106 | url = URL.from_string(u) 107 | assert u == str(url) 108 | 109 | def test_auth_in_netloc(self): 110 | url = URL.from_string("ftp://user:pw@ftp.host") 111 | assert "user:pw@ftp.host" == url.netloc() 112 | 113 | def test_auth_with_special_char(self): 114 | url = URL.from_string("ftp://user:b@z@ftp.host") 115 | assert "user" == url.username() 116 | assert "b@z" == url.password() 117 | 118 | def test_port_in_netloc(self): 119 | url = URL.from_string("http://localhost:5000") 120 | assert "localhost" == url.host() 121 | assert 5000 == url.port() 122 | 123 | def test_passwordless_netloc(self): 124 | url = URL.from_string("postgres://user@127.0.0.1:5432/db_name") 125 | assert "user" == url.username() 126 | assert url.password() is None 127 | 128 | def test_unicode_username_and_password(self): 129 | url = URL.from_string("postgres://jeść:niejeść@127.0.0.1:5432/db_name") 130 | assert "jeść" == url.username() 131 | assert "niejeść" == url.password() 132 | 133 | def test_unicode_username_only(self): 134 | url = URL.from_string("postgres://jeść@127.0.0.1:5432/db_name") 135 | assert "jeść" == url.username() 136 | assert url.password() is None 137 | 138 | def test_port_for_https_url(self): 139 | url = URL.from_string("https://github.com") 140 | assert None == url.port() 141 | 142 | 143 | class TestSimpleExtraction: 144 | url = URL.from_string("http://www.google.com/blog/article/1?q=testing") 145 | 146 | def test_has_actual_param(self): 147 | assert self.url.has_query_param("q") is True 148 | 149 | def test_remove_query_param(self): 150 | new_url = self.url.remove_query_param("q") 151 | assert "http://www.google.com/blog/article/1" == new_url.as_string() 152 | 153 | def test_has_query_params(self): 154 | assert self.url.has_query_params(["q"]) is True 155 | 156 | def test_has_query_params_negative(self): 157 | assert self.url.has_query_params(["q", "r"]) is False 158 | 159 | def test_netloc(self): 160 | assert "www.google.com" == self.url.netloc() 161 | 162 | def test_path_extraction(self): 163 | assert "1" == self.url.path_segment(2) 164 | 165 | def test_port_defaults_to_none(self): 166 | assert self.url.port() is None 167 | 168 | def test_scheme(self): 169 | assert "http" == self.url.scheme() 170 | 171 | def test_host(self): 172 | assert "www.google.com" == self.url.host() 173 | 174 | def test_domain(self): 175 | assert "www.google.com" == self.url.domain() 176 | 177 | def test_subdomains(self): 178 | assert ["www" == "google", "com"], self.url.subdomains() 179 | 180 | def test_subdomain(self): 181 | assert "www" == self.url.subdomain(0) 182 | 183 | def test_invalid_subdomain_raises_indexerror(self): 184 | with pytest.raises(IndexError): 185 | self.url.subdomain(10) 186 | 187 | def test_path(self): 188 | assert "/blog/article/1" == self.url.path() 189 | 190 | def test_query(self): 191 | assert "q=testing" == self.url.query() 192 | 193 | def test_query_param_as_list(self): 194 | assert ["testing"] == self.url.query_param("q", as_list=True) 195 | 196 | def test_query_params(self): 197 | assert {"q": ["testing"]} == self.url.query_params() 198 | 199 | def test_path_extraction_returns_none_if_index_too_large(self): 200 | assert self.url.path_segment(14) is None 201 | 202 | def test_path_extraction_can_take_default_value(self): 203 | assert "hello" == self.url.path_segment(3, default="hello") 204 | 205 | def test_parameter_extraction(self): 206 | assert "testing" == self.url.query_param("q") 207 | 208 | def test_parameter_extraction_with_default(self): 209 | assert "eggs" == self.url.query_param("p", default="eggs") 210 | 211 | def test_parameter_extraction_is_none_if_not_found(self): 212 | assert self.url.query_param("p") is None 213 | 214 | def test_path_segments(self): 215 | assert ("blog", "article", "1") == self.url.path_segments() 216 | 217 | def test_relative(self): 218 | assert "/blog/article/1?q=testing" == str(self.url.relative()) 219 | 220 | 221 | class TestNoTrailingSlash: 222 | def test_path_extraction_without_trailing_slash(self): 223 | u = URL(host="google.com", path="/blog/article/1") 224 | assert "1" == u.path_segment(2) 225 | 226 | 227 | class TestBuilder: 228 | def test_setting_list_as_query_params(self): 229 | first = URL.from_string("?q=testing") 230 | second = URL().query_params(first.query_params()) 231 | assert first.query() == second.query() 232 | 233 | def test_add_path_segment(self): 234 | url = ( 235 | URL("http://example.com") 236 | .add_path_segment("one") 237 | .add_path_segment("two") 238 | .add_path_segment("three") 239 | ) 240 | assert "/one/two/three" == url.path() 241 | 242 | def test_setting_single_item_list_as_query_param(self): 243 | url = URL().query_param("q", ["testing"]) 244 | assert "testing" == url.query_param("q") 245 | 246 | def test_setting_list_as_query_param(self): 247 | url = URL().query_param("q", ["testing", "eggs"]) 248 | assert ["testing" == "eggs"], url.query_param("q", as_list=True) 249 | 250 | def test_build_relative_url(self): 251 | url = URL().path("searching") 252 | assert "/searching" == str(url) 253 | 254 | def test_build_relative_url_with_params(self): 255 | URL().path("/searching").query_param("q", "testing") 256 | 257 | def test_build_with_path_segments(self): 258 | u = URL().path_segments(["path", "to", "page"]) 259 | assert "/path/to/page" == u.as_string() 260 | 261 | def test_set_fragment(self): 262 | url = URL.from_string("http://www.google.com/").fragment("hello") 263 | assert "hello" == url.fragment() 264 | 265 | def test_set_scheme(self): 266 | url = URL.from_string("http://www.google.com/").scheme("https") 267 | assert "https" == url.scheme() 268 | 269 | def test_set_host(self): 270 | url = URL.from_string("http://www.google.com/").host("maps.google.com") 271 | assert "maps.google.com" == url.host() 272 | 273 | def test_set_path(self): 274 | url = URL.from_string("http://www.google.com/").path("search") 275 | assert "/search" == url.path() 276 | 277 | def test_set_path_with_special_chars(self): 278 | url = URL.from_string("http://www.google.com/").path("search something") 279 | assert "/search%20something" == url.path() 280 | 281 | def test_set_query(self): 282 | url = URL.from_string("http://www.google.com/").query("q=testing") 283 | assert "testing" == url.query_param("q") 284 | 285 | def test_set_port(self): 286 | url = URL.from_string("http://www.google.com/").port(8000) 287 | assert 8000 == url.port() 288 | 289 | def test_set_path_segment(self): 290 | url = URL.from_string("http://www.google.com/a/b/c/").path_segment(1, "d") 291 | assert "/a/d/c/" == url.path() 292 | 293 | def test_set_query_param(self): 294 | url = URL.from_string("http://www.google.com/search").query_param( 295 | "q", "testing" 296 | ) 297 | assert "testing" == url.query_param("q") 298 | 299 | def test_set_query_params(self): 300 | url = URL.from_string("http://www.google.com/search").query_params( 301 | {"q": "testing"} 302 | ) 303 | assert "testing" == url.query_param("q") 304 | 305 | def test_set_subdomain(self): 306 | url = URL.from_string("http://www.google.com/search").subdomain(0, "www2") 307 | assert "www2" == url.subdomain(0) 308 | 309 | def test_set_subdomains(self): 310 | url = URL().subdomains(["www", "google", "com"]) 311 | assert "http://www.google.com/" == str(url) 312 | 313 | def test_remove_domain(self): 314 | url = URL("https://example.com/hello?x=100") 315 | new = url.domain("") 316 | assert "/hello?x=100" == str(new) 317 | 318 | def test_remove_port(self): 319 | url = URL("https://example.com/hello?x=100") 320 | new = url.port("") 321 | assert "https://example.com/hello?x=100" == str(new) 322 | 323 | 324 | class TestMisc: 325 | def test_url_can_be_used_as_key_in_dict(self): 326 | u = URL.from_string("http://google.com") 327 | {u: 0} 328 | 329 | def test_equality_comparison(self): 330 | assert URL.from_string("http://google.com") == URL.from_string( 331 | "http://google.com" 332 | ) 333 | 334 | def test_negative_equality_comparison(self): 335 | assert URL.from_string("http://google.com") != URL.from_string( 336 | "https://google.com" 337 | ) 338 | 339 | def test_urls_are_hashable(self): 340 | u = URL.from_string("http://google.com") 341 | hash(u) 342 | 343 | def test_urls_can_be_pickled(self): 344 | u = URL.from_string("http://google.com") 345 | pickle.dumps(u) 346 | 347 | def test_urls_can_be_pickled_and_restored(self): 348 | u = URL.from_string("http://google.com") 349 | pickled = pickle.dumps(u) 350 | v = pickle.loads(pickled) 351 | assert u == v 352 | 353 | 354 | class TestQueryParamList: 355 | def test_set_list(self): 356 | base = URL("http://127.0.0.1/") 357 | url = base.query_param("q", ["something", "else"]) 358 | values = url.query_param("q", as_list=True) 359 | assert ["something" == "else"], values 360 | 361 | def test_remove_item_from_list(self): 362 | base = URL("http://127.0.0.1/?q=a&q=b") 363 | url = base.remove_query_param("q", "a") 364 | values = url.query_param("q", as_list=True) 365 | assert ["b"] == values 366 | 367 | def test_append_to_existing_list(self): 368 | base = URL("http://127.0.0.1/?q=a&q=b") 369 | url = base.append_query_param("q", "c") 370 | values = url.query_param("q", as_list=True) 371 | assert ["a", "b", "c"] == values 372 | 373 | def test_append_to_nonexistant_list(self): 374 | base = URL("http://127.0.0.1/?q=a&q=b") 375 | url = base.append_query_param("p", "c") 376 | values = url.query_param("p", as_list=True) 377 | assert ["c"] == values 378 | 379 | 380 | class TestUnicodeExtraction: 381 | def test_get_query_param_ascii_url(self): 382 | unicode_param = "значение" 383 | 384 | # Python 2.6 requires bytes for quote 385 | urlencoded_param = quote(unicode_param.encode("utf8")) 386 | url = "http://www.google.com/blog/article/1?q=" + urlencoded_param 387 | 388 | ascii_url = URL.from_string(url.encode("ascii")) 389 | param = ascii_url.query_param("q") 390 | assert param == unicode_param 391 | 392 | def test_get_query_param_unicode_url(self): 393 | unicode_param = "значение" 394 | 395 | # Python 2.6 requires bytes for quote 396 | urlencoded_param = quote(unicode_param.encode("utf8")) 397 | url = "http://www.google.com/blog/article/1?q=" + urlencoded_param 398 | 399 | # django request.get_full_path() returns url as unicode 400 | unicode_url = URL.from_string(url) 401 | 402 | param = unicode_url.query_param("q") 403 | assert param == unicode_param 404 | 405 | 406 | class TestUnicode: 407 | base = URL("http://127.0.0.1/") 408 | text = "ć" 409 | bytes = text.encode("utf8") 410 | 411 | def test_set_unicode_query_param_value(self): 412 | url = self.base.query_param("q", self.text) 413 | assert self.text == url.query_param("q") 414 | 415 | def test_set_bytestring_query_param_value(self): 416 | url = self.base.query_param("q", self.bytes) 417 | assert self.text == url.query_param("q") 418 | 419 | def test_set_unicode_query_param_key(self): 420 | url = self.base.query_param(self.text, "value") 421 | assert "value" == url.query_param(self.text) 422 | 423 | def test_set_bytestring_query_param_key(self): 424 | url = self.base.query_param(self.bytes, "value") 425 | assert "value" == url.query_param(self.text) 426 | 427 | def test_append_unicode_query_param(self): 428 | url = self.base.append_query_param("q", self.text) 429 | assert self.text == url.query_param("q") 430 | 431 | def test_append_bytestring_query_param(self): 432 | url = self.base.append_query_param("q", self.bytes) 433 | assert self.text == url.query_param("q") 434 | 435 | def test_set_unicode_query_params(self): 436 | url = self.base.query_params({"q": self.text}) 437 | assert self.text == url.query_param("q") 438 | 439 | def test_set_bytestring_query_params(self): 440 | url = self.base.query_params({"q": self.bytes}) 441 | assert self.text == url.query_param("q") 442 | 443 | def test_add_unicode_path_segment(self): 444 | url = self.base.add_path_segment(self.text) 445 | assert self.text == url.path_segment(0) 446 | 447 | def test_add_bytestring_path_segment(self): 448 | url = self.base.add_path_segment(self.bytes) 449 | assert self.text == url.path_segment(0) 450 | 451 | def test_add_unicode_fragment(self): 452 | url = self.base.fragment(self.text) 453 | assert self.text == url.fragment() 454 | 455 | 456 | class QuotedSlashesTests: 457 | def test_slashes_in_path(self): 458 | u = URL().add_path_segment("test/egg") 459 | assert u.as_string() == "/test%2Fegg" 460 | 461 | def test_slashes_in_path(self): 462 | u = URL("/something").path_segment(0, "test/egg") 463 | assert u.as_string() == "/test%2Fegg" 464 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from purl.url import to_utf8, to_unicode 2 | 3 | 4 | class TestUnicodeHelper: 5 | 6 | def test_convert_int_to_bytes(self): 7 | assert '1024' == to_utf8(1024) 8 | 9 | def test_convert_int_to_unicode(self): 10 | assert u'1024' == to_unicode(1024) 11 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Tox (http://tox.testrun.org/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = py26, py27, py36, py37, py38, pypy, pypy3 8 | 9 | [testenv] 10 | commands = pytest 11 | deps = -r{toxinidir}/requirements.txt 12 | --------------------------------------------------------------------------------