├── .github ├── dependabot.yml └── workflows │ └── ci.yml ├── .gitignore ├── .travis.yml ├── LICENSE.md ├── MANIFEST.in ├── README.md ├── api-as-text.svg ├── api.svg ├── changelog.txt ├── furl ├── __init__.py ├── __version__.py ├── common.py ├── compat.py ├── furl.py └── omdict1D.py ├── logo-as-text.svg ├── logo.svg ├── setup.cfg ├── setup.py ├── tests ├── test_furl.py └── test_omdict1D.py └── tox.ini /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | jobs: 10 | build: 11 | runs-on: ubuntu-22.04 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | include: 16 | - python-version: '3.8' 17 | toxenv: py38 18 | - python-version: '3.9' 19 | toxenv: py39 20 | - python-version: '3.10' 21 | toxenv: py310 22 | - python-version: '3.11' 23 | toxenv: py311 24 | - python-version: '3.12' 25 | toxenv: py312 26 | - python-version: '3.13' 27 | toxenv: py313 28 | - python-version: 'pypy-3.10' 29 | toxenv: pypy3 30 | steps: 31 | - uses: actions/checkout@v4 32 | - name: Set up Python ${{ matrix.python-version }} 33 | uses: actions/setup-python@v5 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | - name: Install tox 37 | run: pip install tox 38 | - name: Tox 39 | run: tox 40 | env: 41 | TOXENV: ${{ matrix.toxenv }} 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .#* 3 | \#* 4 | .tox 5 | dist/ 6 | .eggs/ 7 | build/ 8 | *.pyc 9 | *.pyo 10 | *.egg 11 | *.egg-info 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | matrix: 4 | include: 5 | - env: TOXENV=codestyle 6 | 7 | - python: 2.7 8 | env: TOXENV=py27 9 | - python: 3.6 10 | env: TOXENV=py36 11 | - python: 3.7 12 | env: TOXENV=py37 13 | - python: 3.8 14 | env: TOXENV=py38 15 | - python: 3.9 16 | env: TOXENV=py39 17 | - python: pypy 18 | env: TOXENV=pypy 19 | - python: pypy3 20 | env: TOXENV=pypy3 21 | 22 | install: travis_retry pip install tox 23 | script: tox 24 | 25 | notifications: 26 | email: false 27 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Build Amazing Things. 2 | 3 | *** 4 | 5 | ### Unlicense 6 | 7 | This is free and unencumbered software released into the public domain. 8 | 9 | Anyone is free to copy, modify, publish, use, compile, sell, or 10 | distribute this software, either in source code form or as a compiled 11 | binary, for any purpose, commercial or non-commercial, and by any 12 | means. 13 | 14 | In jurisdictions that recognize copyright laws, the author or authors 15 | of this software dedicate any and all copyright interest in the 16 | software to the public domain. We make this dedication for the benefit 17 | of the public at large and to the detriment of our heirs and 18 | successors. We intend this dedication to be an overt act of 19 | relinquishment in perpetuity of all present and future rights to this 20 | software under copyright law. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 24 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 25 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 26 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 27 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 28 | OTHER DEALINGS IN THE SOFTWARE. 29 | 30 | For more information, please refer to . -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.md README.md 2 | recursive-include tests/ *.py 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | furl 3 |

4 | 5 |

6 | 7 | 8 | 9 | 10 |

11 | 12 | ## furl is a small Python library that makes parsing and
manipulating URLs easy. 13 | 14 | Python's standard 15 | [urllib](https://docs.python.org/3/library/urllib.html) and 16 | [urlparse](https://docs.python.org/3/library/urllib.parse.html) modules 17 | provide a number of URL related functions, but using these functions to 18 | perform common URL operations proves tedious. Furl makes parsing and 19 | manipulating URLs easy. 20 | 21 | Furl is well tested, [Unlicensed](http://unlicense.org/) in the public 22 | domain, and supports Python 3 and PyPy3. 23 | 24 | 👥 Furl is looking for a lead contributor and maintainer. Would you love 25 | to lead furl, and making working with URLs a joy for everyone in Python? 26 | Please [reach out](mailto:grunseid+icecream@gmail.com) and let me know! 🙌 27 | 28 | Code time: Paths and query arguments are easy. Really easy. 29 | 30 | ```python 31 | >>> from furl import furl 32 | >>> f = furl('http://www.google.com/?one=1&two=2') 33 | >>> f /= 'path' 34 | >>> del f.args['one'] 35 | >>> f.args['three'] = '3' 36 | >>> f.url 37 | 'http://www.google.com/path?two=2&three=3' 38 | ``` 39 | 40 | Or use furl's inline modification methods. 41 | 42 | ```python 43 | >>> furl('http://www.google.com/?one=1').add({'two':'2'}).url 44 | 'http://www.google.com/?one=1&two=2' 45 | 46 | >>> furl('http://www.google.com/?one=1&two=2').set({'three':'3'}).url 47 | 'http://www.google.com/?three=3' 48 | 49 | >>> furl('http://www.google.com/?one=1&two=2').remove(['one']).url 50 | 'http://www.google.com/?two=2' 51 | ``` 52 | 53 | Encoding is handled for you. Unicode, too. 54 | 55 | ```python 56 | >>> f = furl('http://www.google.com/') 57 | >>> f.path = 'some encoding here' 58 | >>> f.args['and some encoding'] = 'here, too' 59 | >>> f.url 60 | 'http://www.google.com/some%20encoding%20here?and+some+encoding=here,+too' 61 | >>> f.set(host=u'ドメイン.テスト', path=u'джк', query=u'☃=☺') 62 | >>> f.url 63 | 'http://xn--eckwd4c7c.xn--zckzah/%D0%B4%D0%B6%D0%BA?%E2%98%83=%E2%98%BA' 64 | ``` 65 | 66 | Fragments also have a path and a query. 67 | 68 | ```python 69 | >>> f = furl('http://www.google.com/') 70 | >>> f.fragment.path.segments = ['two', 'directories'] 71 | >>> f.fragment.args = {'one': 'argument'} 72 | >>> f.url 73 | 'http://www.google.com/#two/directories?one=argument' 74 | ``` 75 | 76 | ## Installation 77 | 78 | Installing furl with pip is easy. 79 | 80 | ``` 81 | $ pip install furl 82 | ``` 83 | 84 | 85 | ## API 86 | 87 | * [Basics](#basics) 88 | * [Scheme, Username, Password, Host, Port, Network Location, and Origin](#scheme-username-password-host-port-network-location-and-origin) 89 | * [Path](#path) 90 | * [Manipulation](#manipulation) 91 | * [Query](#query) 92 | * [Manipulation](#manipulation-1) 93 | * [Parameters](#parameters) 94 | * [Fragment](#fragment) 95 | * [Encoding](#encoding) 96 | * [Inline manipulation](#inline-manipulation) 97 | * [Miscellaneous](#miscellaneous) 98 | 99 | 100 | ### Basics 101 | 102 | furl objects let you access and modify the various components of a URL. 103 | 104 | ``` 105 | scheme://username:password@host:port/path?query#fragment 106 | ``` 107 | 108 | * __scheme__ is the scheme string (all lowercase) or None. None means no 109 | scheme. An empty string means a protocol relative URL, like 110 | `//www.google.com`. 111 | * __username__ is the username string for authentication. 112 | * __password__ is the password string for authentication with __username__. 113 | * __host__ is the domain name, IPv4, or IPv6 address as a string. Domain names 114 | are all lowercase. 115 | * __port__ is an integer or None. A value of None means no port specified and 116 | the default port for the given __scheme__ should be inferred, if possible 117 | (e.g. port 80 for the scheme `http`). 118 | * __path__ is a Path object comprised of path segments. 119 | * __query__ is a Query object comprised of key:value query arguments. 120 | * __fragment__ is a Fragment object comprised of a Path object and Query object 121 | separated by an optional `?` separator. 122 | 123 | 124 | 125 | ### Scheme, Username, Password, Host, Port, Network Location, and Origin 126 | 127 | __scheme__, __username__, __password__, and __host__ are strings or 128 | None. __port__ is an integer or None. 129 | 130 | ```python 131 | >>> f = furl('http://user:pass@www.google.com:99/') 132 | >>> f.scheme, f.username, f.password, f.host, f.port 133 | ('http', 'user', 'pass', 'www.google.com', 99) 134 | ``` 135 | 136 | furl infers the default port for common schemes. 137 | 138 | ```python 139 | >>> f = furl('https://secure.google.com/') 140 | >>> f.port 141 | 443 142 | 143 | >>> f = furl('unknown://www.google.com/') 144 | >>> print(f.port) 145 | None 146 | ``` 147 | 148 | __netloc__ is the string combination of __username__, __password__, __host__, 149 | and __port__, not including __port__ if it's None or the default port for the 150 | provided __scheme__. 151 | 152 | ```python 153 | >>> furl('http://www.google.com/').netloc 154 | 'www.google.com' 155 | 156 | >>> furl('http://www.google.com:99/').netloc 157 | 'www.google.com:99' 158 | 159 | >>> furl('http://user:pass@www.google.com:99/').netloc 160 | 'user:pass@www.google.com:99' 161 | ``` 162 | 163 | __origin__ is the string combination of __scheme__, __host__, and __port__, not 164 | including __port__ if it's None or the default port for the provided __scheme__. 165 | 166 | ```python 167 | >>> furl('http://www.google.com/').origin 168 | 'http://www.google.com' 169 | 170 | >>> furl('http://www.google.com:99/').origin 171 | 'http://www.google.com:99' 172 | ``` 173 | 174 | 175 | 176 | ### Path 177 | 178 | URL paths in furl are Path objects that have __segments__, a list of zero or 179 | more path segments that can be manipulated directly. Path segments in 180 | __segments__ are percent-decoded and all interaction with __segments__ should 181 | take place with percent-decoded strings. 182 | 183 | ```python 184 | >>> f = furl('http://www.google.com/a/large%20ish/path') 185 | >>> f.path 186 | Path('/a/large ish/path') 187 | >>> f.path.segments 188 | ['a', 'large ish', 'path'] 189 | >>> str(f.path) 190 | '/a/large%20ish/path' 191 | ``` 192 | 193 | #### Manipulation 194 | 195 | ```python 196 | >>> f.path.segments = ['a', 'new', 'path', ''] 197 | >>> str(f.path) 198 | '/a/new/path/' 199 | 200 | >>> f.path = 'o/hi/there/with%20some%20encoding/' 201 | >>> f.path.segments 202 | ['o', 'hi', 'there', 'with some encoding', ''] 203 | >>> str(f.path) 204 | '/o/hi/there/with%20some%20encoding/' 205 | 206 | >>> f.url 207 | 'http://www.google.com/o/hi/there/with%20some%20encoding/' 208 | 209 | >>> f.path.segments = ['segments', 'are', 'maintained', 'decoded', '^`<>[]"#/?'] 210 | >>> str(f.path) 211 | '/segments/are/maintained/decoded/%5E%60%3C%3E%5B%5D%22%23%2F%3F' 212 | ``` 213 | 214 | A path that starts with `/` is considered absolute, and a Path can be absolute 215 | or not as specified (or set) by the boolean attribute __isabsolute__. URL Paths 216 | have a special restriction: they must be absolute if a __netloc__ (username, 217 | password, host, and/or port) is present. This restriction exists because a URL 218 | path must start with `/` to separate itself from the __netloc__, if 219 | present. Fragment Paths have no such limitation and __isabsolute__ and can be 220 | True or False without restriction. 221 | 222 | Here's a URL Path example that illustrates how __isabsolute__ becomes True and 223 | read-only in the presence of a __netloc__. 224 | 225 | ```python 226 | >>> f = furl('/url/path') 227 | >>> f.path.isabsolute 228 | True 229 | >>> f.path.isabsolute = False 230 | >>> f.url 231 | 'url/path' 232 | >>> f.host = 'blaps.ru' 233 | >>> f.url 234 | 'blaps.ru/url/path' 235 | >>> f.path.isabsolute 236 | True 237 | >>> f.path.isabsolute = False 238 | Traceback (most recent call last): 239 | ... 240 | AttributeError: Path.isabsolute is True and read-only for URLs with a netloc (a username, password, host, and/or port). URL paths must be absolute if a netloc exists. 241 | >>> f.url 242 | 'blaps.ru/url/path' 243 | ``` 244 | 245 | Conversely, the __isabsolute__ attribute of Fragment Paths isn't bound by the 246 | same read-only restriction. URL fragments are always prefixed by a `#` character 247 | and don't need to be separated from the __netloc__. 248 | 249 | ```python 250 | >>> f = furl('http://www.google.com/#/absolute/fragment/path/') 251 | >>> f.fragment.path.isabsolute 252 | True 253 | >>> f.fragment.path.isabsolute = False 254 | >>> f.url 255 | 'http://www.google.com/#absolute/fragment/path/' 256 | >>> f.fragment.path.isabsolute = True 257 | >>> f.url 258 | 'http://www.google.com/#/absolute/fragment/path/' 259 | ``` 260 | 261 | A path that ends with `/` is considered a directory, and otherwise considered a 262 | file. The Path attribute __isdir__ returns True if the path is a directory, 263 | False otherwise. Conversely, the attribute __isfile__ returns True if the path 264 | is a file, False otherwise. 265 | 266 | ```python 267 | >>> f = furl('http://www.google.com/a/directory/') 268 | >>> f.path.isdir 269 | True 270 | >>> f.path.isfile 271 | False 272 | 273 | >>> f = furl('http://www.google.com/a/file') 274 | >>> f.path.isdir 275 | False 276 | >>> f.path.isfile 277 | True 278 | ``` 279 | 280 | A path can be normalized with __normalize()__, and __normalize()__ returns the 281 | Path object for method chaining. 282 | 283 | ```python 284 | >>> f = furl('http://www.google.com////a/./b/lolsup/../c/') 285 | >>> f.path.normalize() 286 | >>> f.url 287 | 'http://www.google.com/a/b/c/' 288 | ``` 289 | 290 | Path segments can also be appended with the slash operator, like with 291 | [pathlib.Path](https://docs.python.org/3/library/pathlib.html#operators). 292 | 293 | ```python 294 | >>> from __future__ import division # For Python 2.x. 295 | >>> 296 | >>> f = furl('path') 297 | >>> f.path /= 'with' 298 | >>> f.path = f.path / 'more' / 'path segments/' 299 | >>> f.url 300 | '/path/with/more/path%20segments/' 301 | ``` 302 | 303 | For a dictionary representation of a path, use __asdict()__. 304 | 305 | ```python 306 | >>> f = furl('http://www.google.com/some/enc%20oding') 307 | >>> f.path.asdict() 308 | { 'encoded': '/some/enc%20oding', 309 | 'isabsolute': True, 310 | 'isdir': False, 311 | 'isfile': True, 312 | 'segments': ['some', 'enc oding'] } 313 | ``` 314 | 315 | 316 | 317 | ### Query 318 | 319 | URL queries in furl are Query objects that have __params__, a one dimensional 320 | [ordered multivalue dictionary](https://github.com/gruns/orderedmultidict) of 321 | query keys and values. Query keys and values in __params__ are percent-decoded 322 | and all interaction with __params__ should take place with percent-decoded 323 | strings. 324 | 325 | ```python 326 | >>> f = furl('http://www.google.com/?one=1&two=2') 327 | >>> f.query 328 | Query('one=1&two=2') 329 | >>> f.query.params 330 | omdict1D([('one', '1'), ('two', '2')]) 331 | >>> str(f.query) 332 | 'one=1&two=2' 333 | ``` 334 | 335 | furl objects and Fragment objects (covered below) contain a Query object, and 336 | __args__ is provided as a shortcut on these objects to access __query.params__. 337 | 338 | ```python 339 | >>> f = furl('http://www.google.com/?one=1&two=2') 340 | >>> f.query.params 341 | omdict1D([('one', '1'), ('two', '2')]) 342 | >>> f.args 343 | omdict1D([('one', '1'), ('two', '2')]) 344 | >>> f.args is f.query.params 345 | True 346 | ``` 347 | 348 | #### Manipulation 349 | 350 | __params__ is a one dimensional 351 | [ordered multivalue dictionary](https://github.com/gruns/orderedmultidict) that 352 | maintains method parity with Python's standard dictionary. 353 | 354 | ```python 355 | >>> f.query = 'silicon=14&iron=26&inexorable%20progress=vae%20victus' 356 | >>> f.query.params 357 | omdict1D([('silicon', '14'), ('iron', '26'), ('inexorable progress', 'vae victus')]) 358 | >>> del f.args['inexorable progress'] 359 | >>> f.args['magnesium'] = '12' 360 | >>> f.args 361 | omdict1D([('silicon', '14'), ('iron', '26'), ('magnesium', '12')]) 362 | ``` 363 | 364 | __params__ can also store multiple values for the same key because it's a 365 | multivalue dictionary. 366 | 367 | ```python 368 | >>> f = furl('http://www.google.com/?space=jams&space=slams') 369 | >>> f.args['space'] 370 | 'jams' 371 | >>> f.args.getlist('space') 372 | ['jams', 'slams'] 373 | >>> f.args.addlist('repeated', ['1', '2', '3']) 374 | >>> str(f.query) 375 | 'space=jams&space=slams&repeated=1&repeated=2&repeated=3' 376 | >>> f.args.popvalue('space') 377 | 'slams' 378 | >>> f.args.popvalue('repeated', '2') 379 | '2' 380 | >>> str(f.query) 381 | 'space=jams&repeated=1&repeated=3' 382 | ``` 383 | 384 | __params__ is one dimensional. If a list of values is provided as a query value, 385 | that list is interpreted as multiple values. 386 | 387 | ```python 388 | >>> f = furl() 389 | >>> f.args['repeated'] = ['1', '2', '3'] 390 | >>> f.add(args={'space':['jams', 'slams']}) 391 | >>> str(f.query) 392 | 'repeated=1&repeated=2&repeated=3&space=jams&space=slams' 393 | ``` 394 | 395 | This makes sense: URL queries are inherently one dimensional -- query values 396 | can't have native subvalues. 397 | 398 | See the [orderedmultimdict](https://github.com/gruns/orderedmultidict) 399 | documentation for more information on interacting with the ordered multivalue 400 | dictionary __params__. 401 | 402 | #### Parameters 403 | 404 | To produce an empty query argument, like `http://sprop.su/?param=`, set the 405 | argument's value to the empty string. 406 | 407 | ```python 408 | >>> f = furl('http://sprop.su') 409 | >>> f.args['param'] = '' 410 | >>> f.url 411 | 'http://sprop.su/?param=' 412 | ``` 413 | 414 | To produce an empty query argument without a trailing `=`, use `None` as the 415 | parameter value. 416 | 417 | ```python 418 | >>> f = furl('http://sprop.su') 419 | >>> f.args['param'] = None 420 | >>> f.url 421 | 'http://sprop.su/?param' 422 | ``` 423 | 424 | __encode(delimiter='&', quote_plus=True, dont_quote='')__ can be used to encode 425 | query strings with delimiters like `;`, encode spaces as `+` instead of `%20` 426 | (i.e. application/x-www-form-urlencoded encoded), or avoid percent-encoding 427 | valid query characters entirely (valid query characters are 428 | `/?:@-._~!$&'()*+,;=`). 429 | 430 | ```python 431 | >>> f.query = 'space=jams&woofs=squeeze+dog' 432 | >>> f.query.encode() 433 | 'space=jams&woofs=squeeze+dog' 434 | >>> f.query.encode(';') 435 | 'space=jams;woofs=squeeze+dog' 436 | >>> f.query.encode(quote_plus=False) 437 | 'space=jams&woofs=squeeze%20dog' 438 | ``` 439 | 440 | `dont_quote` accepts `True`, `False`, or a string of valid query characters to 441 | not percent-enode. If `True`, all valid query characters `/?:@-._~!$&'()*+,;=` 442 | aren't percent-encoded. 443 | 444 | ```python 445 | >>> f.query = 'one,two/three' 446 | >>> f.query.encode() 447 | 'one%2Ctwo%2Fthree' 448 | >>> f.query.encode(dont_quote=True) 449 | 'one,two/three' 450 | >>> f.query.encode(dont_quote=',') 451 | 'one,two%2Fthree' 452 | ``` 453 | 454 | For a dictionary representation of a query, use __asdict()__. 455 | 456 | ```python 457 | >>> f = furl('http://www.google.com/?space=ja+ms&space=slams') 458 | >>> f.query.asdict() 459 | { 'encoded': 'space=ja+ms&space=slams', 460 | 'params': [('space', 'ja ms'), 461 | ('space', 'slams')] } 462 | ``` 463 | 464 | 465 | 466 | ### Fragment 467 | 468 | URL fragments in furl are Fragment objects that have a Path __path__ and Query 469 | __query__ separated by an optional `?` __separator__. 470 | 471 | ```python 472 | >>> f = furl('http://www.google.com/#/fragment/path?with=params') 473 | >>> f.fragment 474 | Fragment('/fragment/path?with=params') 475 | >>> f.fragment.path 476 | Path('/fragment/path') 477 | >>> f.fragment.query 478 | Query('with=params') 479 | >>> f.fragment.separator 480 | True 481 | ``` 482 | 483 | Manipulation of Fragments is done via the Fragment's Path and Query instances, 484 | __path__ and __query__. 485 | 486 | ```python 487 | >>> f = furl('http://www.google.com/#/fragment/path?with=params') 488 | >>> str(f.fragment) 489 | '/fragment/path?with=params' 490 | >>> f.fragment.path.segments.append('file.ext') 491 | >>> str(f.fragment) 492 | '/fragment/path/file.ext?with=params' 493 | 494 | >>> f = furl('http://www.google.com/#/fragment/path?with=params') 495 | >>> str(f.fragment) 496 | '/fragment/path?with=params' 497 | >>> f.fragment.args['new'] = 'yep' 498 | >>> str(f.fragment) 499 | '/fragment/path?new=yep&with=params' 500 | ``` 501 | 502 | Creating hash-bang fragments with furl illustrates the use of Fragment's boolean 503 | attribute __separator__. When __separator__ is False, the `?` that separates 504 | __path__ and __query__ isn't included. 505 | 506 | ```python 507 | >>> f = furl('http://www.google.com/') 508 | >>> f.fragment.path = '!' 509 | >>> f.fragment.args = {'a':'dict', 'of':'args'} 510 | >>> f.fragment.separator 511 | True 512 | >>> str(f.fragment) 513 | '!?a=dict&of=args' 514 | 515 | >>> f.fragment.separator = False 516 | >>> str(f.fragment) 517 | '!a=dict&of=args' 518 | >>> f.url 519 | 'http://www.google.com/#!a=dict&of=args' 520 | ``` 521 | 522 | For a dictionary representation of a fragment, use __asdict()__. 523 | 524 | ```python 525 | >>> f = furl('http://www.google.com/#path?args=args') 526 | >>> f.fragment.asdict() 527 | { 'encoded': 'path?args=args', 528 | 'separator': True, 529 | 'path': { 'encoded': 'path', 530 | 'isabsolute': False, 531 | 'isdir': False, 532 | 'isfile': True, 533 | 'segments': ['path']}, 534 | 'query': { 'encoded': 'args=args', 535 | 'params': [('args', 'args')]} } 536 | ``` 537 | 538 | 539 | 540 | ### Encoding 541 | 542 | Furl handles encoding for you, and furl's philosophy on encoding is simple: raw 543 | URL strings should always be percent-encoded. 544 | 545 | ```python 546 | >>> f = furl() 547 | >>> f.netloc = '%40user:%3Apass@google.com' 548 | >>> f.username, f.password 549 | '@user', ':pass' 550 | 551 | >>> f = furl() 552 | >>> f.path = 'supply%20percent%20encoded/path%20strings' 553 | >>> f.path.segments 554 | ['supply percent encoded', 'path strings'] 555 | 556 | >>> f.set(query='supply+percent+encoded=query+strings,+too') 557 | >>> f.query.params 558 | omdict1D([('supply percent encoded', 'query strings, too')]) 559 | 560 | >>> f.set(fragment='percent%20encoded%20path?and+percent+encoded=query+too') 561 | >>> f.fragment.path.segments 562 | ['percent encoded path'] 563 | >>> f.fragment.args 564 | omdict1D([('and percent encoded', 'query too')]) 565 | ``` 566 | 567 | Raw, non-URL strings should never be percent-encoded. 568 | 569 | ```python 570 | >>> f = furl('http://google.com') 571 | >>> f.set(username='@prap', password=':porps') 572 | >>> f.url 573 | 'http://%40prap:%3Aporps@google.com' 574 | 575 | >>> f = furl() 576 | >>> f.set(path=['path segments are', 'decoded', '<>[]"#']) 577 | >>> str(f.path) 578 | '/path%20segments%20are/decoded/%3C%3E%5B%5D%22%23' 579 | 580 | >>> f.set(args={'query parameters':'and values', 'are':'decoded, too'}) 581 | >>> str(f.query) 582 | 'query+parameters=and+values&are=decoded,+too' 583 | 584 | >>> f.fragment.path.segments = ['decoded', 'path segments'] 585 | >>> f.fragment.args = {'and decoded':'query parameters and values'} 586 | >>> str(f.fragment) 587 | 'decoded/path%20segments?and+decoded=query+parameters+and+values' 588 | ``` 589 | 590 | Python's 591 | [urllib.quote()](http://docs.python.org/library/urllib.html#urllib.quote) and 592 | [urllib.unquote()](http://docs.python.org/library/urllib.html#urllib.unquote) 593 | can be used to percent-encode and percent-decode path strings. Similarly, 594 | [urllib.quote_plus()](http://docs.python.org/library/urllib.html#urllib.quote_plus) 595 | and 596 | [urllib.unquote_plus()](http://docs.python.org/library/urllib.html#urllib.unquote_plus) 597 | can be used to percent-encode and percent-decode query strings. 598 | 599 | 600 | 601 | ### Inline manipulation 602 | 603 | For quick, single-line URL manipulation, the __add()__, __set()__, and 604 | __remove()__ methods of furl objects manipulate various URL components and 605 | return the furl object for method chaining. 606 | 607 | ```python 608 | >>> url = 'http://www.google.com/#fragment' 609 | >>> furl(url).add(args={'example':'arg'}).set(port=99).remove(fragment=True).url 610 | 'http://www.google.com:99/?example=arg' 611 | ``` 612 | 613 | __add()__ adds items to a furl object with the optional arguments 614 | 615 | * __args__: Shortcut for __query_params__. 616 | * __path__: A list of path segments to add to the existing path segments, or a 617 | path string to join with the existing path string. 618 | * __query_params__: A dictionary of query keys and values to add to the query. 619 | * __fragment_path__: A list of path segments to add to the existing fragment 620 | path segments, or a path string to join with the existing fragment path 621 | string. 622 | * __fragment_args__: A dictionary of query keys and values to add to the 623 | fragment's query. 624 | 625 | ```python 626 | >>> f = furl('http://www.google.com/').add( 627 | ... path='/search', fragment_path='frag/path', fragment_args={'frag':'arg'}) 628 | >>> f.url 629 | 'http://www.google.com/search#frag/path?frag=args' 630 | ``` 631 | 632 | __set()__ sets items of a furl object with the optional arguments 633 | 634 | * __args__: Shortcut for __query_params__. 635 | * __path__: List of path segments or a path string to adopt. 636 | * __scheme__: Scheme string to adopt. 637 | * __netloc__: Network location string to adopt. 638 | * __origin__: Origin string to adopt. 639 | * __query__: Query string to adopt. 640 | * __query_params__: A dictionary of query keys and values to adopt. 641 | * __fragment__: Fragment string to adopt. 642 | * __fragment_path__: A list of path segments to adopt for the fragment's path 643 | or a path string to adopt as the fragment's path. 644 | * __fragment_args__: A dictionary of query keys and values for the fragment's 645 | query to adopt. 646 | * __fragment_separator__: Boolean whether or not there should be a `?` 647 | separator between the fragment path and the fragment query. 648 | * __host__: Host string to adopt. 649 | * __port__: Port number to adopt. 650 | * __username__: Username string to adopt. 651 | * __password__: password string to adopt. 652 | 653 | 654 | ```python 655 | >>> f = furl().set( 656 | ... scheme='https', host='secure.google.com', port=99, path='index.html', 657 | ... args={'some':'args'}, fragment='great job') 658 | >>> f.url 659 | 'https://secure.google.com:99/index.html?some=args#great%20job' 660 | ``` 661 | 662 | __remove()__ removes items from a furl object with the optional arguments 663 | 664 | * __args__: Shortcut for __query_params__. 665 | * __path__: A list of path segments to remove from the end of the existing path 666 | segments list, or a path string to remove from the end of the existing 667 | path string, or True to remove the entire path portion of the URL. 668 | * __query__: A list of query keys to remove from the query, if they exist, or 669 | True to remove the entire query portion of the URL. 670 | * __query_params__: A list of query keys to remove from the query, if they 671 | exist. 672 | * __fragment__: If True, remove the entire fragment portion of the URL. 673 | * __fragment_path__: A list of path segments to remove from the end of the 674 | fragment's path segments, or a path string to remove from the end of the 675 | fragment's path string, or True to remove the entire fragment path. 676 | * __fragment_args__: A list of query keys to remove from the fragment's query, 677 | if they exist. 678 | * __username__: If True, remove the username, if it exists. 679 | * __password__: If True, remove the password, if it exists. 680 | 681 | 682 | ```python 683 | >>> url = 'https://secure.google.com:99/a/path/?some=args#great job' 684 | >>> furl(url).remove(args=['some'], path='path/', fragment=True, port=True).url 685 | 'https://secure.google.com/a/' 686 | ``` 687 | 688 | 689 | 690 | ### Miscellaneous 691 | 692 | Like [pathlib.Path](https://docs.python.org/3/library/pathlib.html#operators), 693 | path segments can be appended to a furl object's Path with the slash operator. 694 | 695 | ```python 696 | >>> from __future__ import division # For Python 2.x. 697 | >>> f = furl('http://www.google.com/path?example=arg#frag') 698 | >>> f /= 'add' 699 | >>> f = f / 'seg ments/' 700 | >>> f.url 701 | 'http://www.google.com/path/add/seg%20ments/?example=arg#frag' 702 | ``` 703 | 704 | __tostr(query_delimiter='&', query_quote_plus=True, query_dont_quote='')__ 705 | creates and returns a URL string. `query_delimiter`, `query_quote_plus`, and 706 | `query_dont_quote` are passed unmodified to `Query.encode()` as `delimiter`, 707 | `quote_plus`, and `dont_quote` respectively. 708 | 709 | ```python 710 | >>> f = furl('http://spep.ru/?a+b=c+d&two%20tap=cat%20nap%24') 711 | >>> f.tostr() 712 | 'http://spep.ru/?a+b=c+d&two+tap=cat+nap$' 713 | >>> f.tostr(query_delimiter=';', query_quote_plus=False) 714 | 'http://spep.ru/?a%20b=c%20d;two%20tap=cat%20nap$' 715 | >>> f.tostr(query_dont_quote='$') 716 | 'http://spep.ru/?a+b=c+d&two+tap=cat+nap$' 717 | ``` 718 | 719 | `furl.url` is a shortcut for `furl.tostr()`. 720 | 721 | ```python 722 | >>> f.url 723 | 'http://spep.ru/?a+b=c+d&two+tap=cat+nap$' 724 | >>> f.url == f.tostr() == str(f) 725 | True 726 | ``` 727 | 728 | __copy()__ creates and returns a new furl object with an identical URL. 729 | 730 | ```python 731 | >>> f = furl('http://www.google.com') 732 | >>> f.copy().set(path='/new/path').url 733 | 'http://www.google.com/new/path' 734 | >>> f.url 735 | 'http://www.google.com' 736 | ``` 737 | 738 | __join()__ joins the furl object's URL with the provided relative or absolute 739 | URL and returns the furl object for method chaining. __join()__'s action is the 740 | same as navigating to the provided URL from the current URL in a web browser. 741 | 742 | ```python 743 | >>> f = furl('http://www.google.com') 744 | >>> f.join('new/path').url 745 | 'http://www.google.com/new/path' 746 | >>> f.join('replaced').url 747 | 'http://www.google.com/new/replaced' 748 | >>> f.join('../parent').url 749 | 'http://www.google.com/parent' 750 | >>> f.join('path?query=yes#fragment').url 751 | 'http://www.google.com/path?query=yes#fragment' 752 | >>> f.join('unknown://www.yahoo.com/new/url/').url 753 | 'unknown://www.yahoo.com/new/url/' 754 | ``` 755 | 756 | For a dictionary representation of a URL, use __asdict()__. 757 | 758 | ```python 759 | >>> f = furl('https://xn--eckwd4c7c.xn--zckzah/path?args=args#frag') 760 | >>> f.asdict() 761 | { 'url': 'https://xn--eckwd4c7c.xn--zckzah/path?args=args#frag', 762 | 'scheme': 'https', 763 | 'username': None 764 | 'password': None, 765 | 'host': 'ドメイン.テスト', 766 | 'host_encoded': 'xn--eckwd4c7c.xn--zckzah', 767 | 'port': 443, 768 | 'netloc': 'xn--eckwd4c7c.xn--zckzah', 769 | 'origin': 'https://xn--eckwd4c7c.xn--zckzah', 770 | 'path': { 'encoded': '/path', 771 | 'isabsolute': True, 772 | 'isdir': False, 773 | 'isfile': True, 774 | 'segments': ['path']}, 775 | 'query': { 'encoded': 'args=args', 776 | 'params': [('args', 'args')]}, 777 | 'fragment': { 'encoded': 'frag', 778 | 'path': { 'encoded': 'frag', 779 | 'isabsolute': False, 780 | 'isdir': False, 781 | 'isfile': True, 782 | 'segments': ['frag']}, 783 | 'query': { 'encoded': '', 784 | 'params': []}, 785 | 'separator': True} } 786 | ``` 787 | -------------------------------------------------------------------------------- /api-as-text.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 20 | 42 | 44 | 45 | 47 | image/svg+xml 48 | 50 | 51 | 52 | 53 | 54 | 59 | f://url API 79 | 80 | 81 | -------------------------------------------------------------------------------- /api.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | image/svg+xml 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /changelog.txt: -------------------------------------------------------------------------------- 1 | ================================================================================ 2 | v2.1.4 3 | ================================================================================ 4 | Changed: Drop support for all Python versions prior to Python 3.8, which 5 | are now long past EOL. Notably: Python 2 is no longer supported. 6 | 7 | 8 | ================================================================================ 9 | v2.1.3 10 | ================================================================================ 11 | Fixed: Actually drop ';' as a query delimiter. See furl v2.1.2's 12 | changelog and https://bugs.python.org/issue42967. 13 | 14 | 15 | ================================================================================ 16 | v2.1.2 17 | ================================================================================ 18 | Fixed: Support Python 3.9's changed urllib.parse.urljoin() behavior. 19 | < py3.9: furl('wss://slrp.com/').join('foo:1') -> 'wss://slrp.com/foo:1' 20 | >= py3.9: furl('wss://slrp.com/').join('foo:1') -> 'foo:1' 21 | Changed: Drop semicolon query delimiters. See 22 | https://bugs.python.org/issue42967. 23 | Changed: Drop support for EOL Python 3.4 and Python 3.5. 24 | 25 | 26 | ================================================================================ 27 | v2.1.1 28 | ================================================================================ 29 | Fixed: Export metadata variables (furl.__title__, furl.__version__, etc). 30 | Added: scheme, host, netloc, and origin as parameters to furl.remove(). 31 | Changed: Homogenize parameter order across furl.add(), furl.set(), and 32 | furl.remove(). 33 | Changed: furl.origin can be assigned None. This has the same behavior as 34 | furl.remove(origin=True). 35 | 36 | 37 | ================================================================================ 38 | v2.1.0 39 | ================================================================================ 40 | Added: A dont_quote= parameter to Query.encode() and a query_dont_quote= 41 | parameter to furl.tostr() that exempt valid query characters from being 42 | percent-encoded, either in their entirety with dont_quote=True, or selectively 43 | with dont_quote=, like dont_quote='/?@_'. 44 | Changed: Move package info from __init__.py into the more standard 45 | __version__.py. 46 | Fixed: Support Unicode usernames and passwords in Python 2. 47 | Fixed: Update orderedmultdict to v1.0.1 to resolve a DeprecationWarning. 48 | Fixed: Encode '/' consistently in query strings across both quote_plus=True and 49 | quote_plus=False. 50 | 51 | 52 | ================================================================================ 53 | v2.0.0 54 | ================================================================================ 55 | Added: All URL components (scheme, host, path, etc) to furl()'s constructor as 56 | keyword arguments. E.g. f = furl(scheme='http', host='host', path='/lolsup'). 57 | Changed: furl.__truediv__() and Path.__truediv__() now mirror 58 | Pathlib.__truediv__()'s behavior and return a new instance. The original 59 | instance is no longer modified. Old behavior: f = furl('1'); f / '2' -> str(f) 60 | == '1'. New behavior: f = furl('1'); f /= '2' -> str(f) == '1/2'. 61 | Fixed: Path.load() now accepts Path instances, e.g. f.path.load(Path('hi')). 62 | Removed: Support for Python 2.6, which reached EOL on 2013-10-29. 63 | 64 | 65 | ================================================================================ 66 | v1.2.1 67 | ================================================================================ 68 | Fixed: Join URLs without an authority (e.g. 'foo:blah') consistently with 69 | urllib.parse.urljoin(). 70 | 71 | 72 | ================================================================================ 73 | v1.2 74 | ================================================================================ 75 | Added: Path segment appending via the division operator (__truediv__()). 76 | Changed: Bump orderedmultidict dependency to v1.0. 77 | Changed: Check code style with flake8 instead of pycodestyle. 78 | Changed: Percent-encode all non-unreserved characters in Query key=value pairs, 79 | including valid query characters (e.g. '=', '?', etc). Old encoding: 80 | "?url=http://foo.com/"; new encoding: "?url=http%3A%2F%2Ffoo.com%2F". Equal 81 | signs remain decoded in query values where the key is empty to allow for, and 82 | preserve, queries like '?==3=='. 83 | 84 | 85 | ================================================================================ 86 | v1.1 87 | ================================================================================ 88 | Fixed: Support and preserve all query strings as provided. For example, preserve 89 | the query '&&==' of 'http://foo.com?&&==' as-is. Empty key=value pairs are 90 | stored as ('', None) in Query.params, e.g. [('', None), ('', None)] for the 91 | query '&'. 92 | Changed: Don't encode equal signs ('=') in query values if the key is empty. 93 | That is, allow and preserve queries like '?==3==' while also percent encoding 94 | equal signs in query values with an associted key, as expected. E.g. 95 | '?a=1%3D1'. 96 | 97 | 98 | ================================================================================ 99 | v1.0.2 100 | ================================================================================ 101 | Added: strip_scheme() public function. 102 | Changed: Make get_scheme() and set_scheme() functions public. 103 | Added: Support all schemes without a netloc/authority, like 104 | 'mailto:hi@email.com', without an explicit whitelist of such schemes 105 | (e.g. tel:, sms:, mailto:, etc). 106 | Fixed: Restore furl.url's setter method. E.g. furl.url = 'http://www.foo.com/'. 107 | Removed: Support for Python 3.3, which reached EOL on 2017-09-29. 108 | 109 | 110 | ================================================================================ 111 | v1.0.1 112 | ================================================================================ 113 | Added: Add dictionary representations of Path, Query, Fragment, and furl objects 114 | via an asdict() method. 115 | 116 | 117 | ================================================================================ 118 | v1.0.0 119 | ================================================================================ 120 | Added: Test against Python 3.6. 121 | Changed: Bumped the version number to v1.0 to signify that furl is a mature and 122 | stable library. Furl has been marked Production/Stable in setup.py for a long 123 | time anyhow -- it's high time for the version number to catch up. 124 | 125 | 126 | ================================================================================ 127 | v0.5.7 128 | ================================================================================ 129 | Fixed: Only percent-decode percent-encoded path strings once, not twice. 130 | 131 | 132 | ================================================================================ 133 | v0.5.6 134 | ================================================================================ 135 | Changed: Bumped the orderedmultidict dependency from v0.7.7 to v0.7.8. The 136 | latter, v0.7.8, fixes a splat (i.e. **omdict) bug. 137 | 138 | 139 | ================================================================================ 140 | v0.5.5 141 | ================================================================================ 142 | Changed: Bumped the orderedmultidict dependency from v0.7.6 to v0.7.7. The 143 | latter, v0.7.7, better interoperates with other popular libraries, like 144 | Requests. 145 | 146 | 147 | ================================================================================ 148 | v0.5.4 149 | ================================================================================ 150 | Fixed: When provided to constructors and load() methods, treat None as the empty 151 | string, not the string 'None'. E.g. furl(None) == furl(''), not furl('None'). 152 | 153 | 154 | ================================================================================ 155 | v0.5.3 156 | ================================================================================ 157 | Fixed: In Python 2, furl.netloc and furl.origin return strings, not Unicode 158 | strings. 159 | 160 | 161 | ================================================================================ 162 | v0.5.2 163 | ================================================================================ 164 | Added: Test PEP8 compliance with tox. 165 | Fixed: Verify valid schemes before adoption. 166 | 167 | 168 | ================================================================================ 169 | v0.5.1 170 | ================================================================================ 171 | Added: Origin support (e.g. http://google.com of http://u:p@google.com/). 172 | 173 | 174 | This changelog wasn't maintained prior to v0.5. 175 | -------------------------------------------------------------------------------- /furl/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # 4 | # furl - URL manipulation made simple. 5 | # 6 | # Ansgar Grunseid 7 | # grunseid.com 8 | # grunseid@gmail.com 9 | # 10 | # License: Build Amazing Things (Unlicense) 11 | # 12 | 13 | from .furl import * # noqa 14 | 15 | # Import all variables in __version__.py without explicit imports. 16 | from . import __version__ 17 | globals().update(dict((k, v) for k, v in __version__.__dict__.items())) 18 | -------------------------------------------------------------------------------- /furl/__version__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # 4 | # furl - URL manipulation made simple. 5 | # 6 | # Ansgar Grunseid 7 | # grunseid.com 8 | # grunseid@gmail.com 9 | # 10 | # License: Build Amazing Things (Unlicense) 11 | # 12 | 13 | __title__ = 'furl' 14 | __version__ = '2.1.4' 15 | __license__ = 'Unlicense' 16 | __author__ = 'Ansgar Grunseid' 17 | __contact__ = 'grunseid@gmail.com' 18 | __url__ = 'https://github.com/gruns/furl' 19 | __copyright__ = 'Copyright Ansgar Grunseid' 20 | __description__ = 'URL manipulation made simple.' 21 | -------------------------------------------------------------------------------- /furl/common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # 4 | # furl - URL manipulation made simple. 5 | # 6 | # Ansgar Grunseid 7 | # grunseid.com 8 | # grunseid@gmail.com 9 | # 10 | # License: Build Amazing Things (Unlicense) 11 | # 12 | 13 | from .compat import string_types 14 | 15 | 16 | absent = object() 17 | 18 | 19 | def callable_attr(obj, attr): 20 | return hasattr(obj, attr) and callable(getattr(obj, attr)) 21 | 22 | 23 | def is_iterable_but_not_string(v): 24 | return callable_attr(v, '__iter__') and not isinstance(v, string_types) 25 | -------------------------------------------------------------------------------- /furl/compat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # 4 | # furl - URL manipulation made simple. 5 | # 6 | # Ansgar Grunseid 7 | # grunseid.com 8 | # grunseid@gmail.com 9 | # 10 | # License: Build Amazing Things (Unlicense) 11 | # 12 | 13 | import sys 14 | 15 | 16 | if sys.version_info[0] == 2: 17 | string_types = basestring # noqa 18 | else: 19 | string_types = (str, bytes) 20 | 21 | 22 | if list(sys.version_info[:2]) >= [2, 7]: 23 | from collections import OrderedDict # noqa 24 | else: 25 | from ordereddict import OrderedDict # noqa 26 | 27 | 28 | class UnicodeMixin(object): 29 | """ 30 | Mixin that defines proper __str__/__unicode__ methods in Python 2 or 3. 31 | """ 32 | if sys.version_info[0] >= 3: # Python 3 33 | def __str__(self): 34 | return self.__unicode__() 35 | else: # Python 2 36 | def __str__(self): 37 | return self.__unicode__().encode('utf8') 38 | -------------------------------------------------------------------------------- /furl/furl.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # 4 | # furl - URL manipulation made simple. 5 | # 6 | # Ansgar Grunseid 7 | # grunseid.com 8 | # grunseid@gmail.com 9 | # 10 | # License: Build Amazing Things (Unlicense) 11 | # 12 | 13 | import re 14 | import abc 15 | import warnings 16 | from copy import deepcopy 17 | from posixpath import normpath 18 | 19 | import six 20 | from six.moves import urllib 21 | from six.moves.urllib.parse import quote, unquote 22 | try: 23 | from icecream import ic 24 | except ImportError: # Graceful fallback if IceCream isn't installed. 25 | ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa 26 | 27 | from .omdict1D import omdict1D 28 | from .compat import string_types, UnicodeMixin 29 | from .common import ( 30 | callable_attr, is_iterable_but_not_string, absent as _absent) 31 | 32 | 33 | # Map of common protocols, as suggested by the common protocols included in 34 | # urllib/parse.py, to their default ports. Protocol scheme strings are 35 | # lowercase. 36 | # 37 | # TODO(Ans): Is there a public map of schemes to their default ports? If not, 38 | # create one? Best I (Ansgar) could find is 39 | # 40 | # https://gist.github.com/mahmoud/2fe281a8daaff26cfe9c15d2c5bf5c8b 41 | # 42 | DEFAULT_PORTS = { 43 | 'acap': 674, 44 | 'afp': 548, 45 | 'dict': 2628, 46 | 'dns': 53, 47 | 'ftp': 21, 48 | 'git': 9418, 49 | 'gopher': 70, 50 | 'hdl': 2641, 51 | 'http': 80, 52 | 'https': 443, 53 | 'imap': 143, 54 | 'ipp': 631, 55 | 'ipps': 631, 56 | 'irc': 194, 57 | 'ircs': 6697, 58 | 'ldap': 389, 59 | 'ldaps': 636, 60 | 'mms': 1755, 61 | 'msrp': 2855, 62 | 'mtqp': 1038, 63 | 'nfs': 111, 64 | 'nntp': 119, 65 | 'nntps': 563, 66 | 'pop': 110, 67 | 'prospero': 1525, 68 | 'redis': 6379, 69 | 'rsync': 873, 70 | 'rtsp': 554, 71 | 'rtsps': 322, 72 | 'rtspu': 5005, 73 | 'sftp': 22, 74 | 'sip': 5060, 75 | 'sips': 5061, 76 | 'smb': 445, 77 | 'snews': 563, 78 | 'snmp': 161, 79 | 'ssh': 22, 80 | 'svn': 3690, 81 | 'telnet': 23, 82 | 'tftp': 69, 83 | 'ventrilo': 3784, 84 | 'vnc': 5900, 85 | 'wais': 210, 86 | 'ws': 80, 87 | 'wss': 443, 88 | 'xmpp': 5222, 89 | } 90 | 91 | 92 | def lget(lst, index, default=None): 93 | try: 94 | return lst[index] 95 | except IndexError: 96 | return default 97 | 98 | 99 | def attemptstr(o): 100 | try: 101 | return str(o) 102 | except Exception: 103 | return o 104 | 105 | 106 | def utf8(o, default=_absent): 107 | try: 108 | return o.encode('utf8') 109 | except Exception: 110 | return o if default is _absent else default 111 | 112 | 113 | def non_string_iterable(o): 114 | return callable_attr(o, '__iter__') and not isinstance(o, string_types) 115 | 116 | 117 | # TODO(grun): Support IDNA2008 via the third party idna module. See 118 | # https://github.com/gruns/furl/issues/73#issuecomment-226549755. 119 | def idna_encode(o): 120 | if callable_attr(o, 'encode'): 121 | return str(o.encode('idna').decode('utf8')) 122 | return o 123 | 124 | 125 | def idna_decode(o): 126 | if callable_attr(utf8(o), 'decode'): 127 | return utf8(o).decode('idna') 128 | return o 129 | 130 | 131 | def is_valid_port(port): 132 | port = str(port) 133 | if not port.isdigit() or not 0 < int(port) <= 65535: 134 | return False 135 | return True 136 | 137 | 138 | def static_vars(**kwargs): 139 | def decorator(func): 140 | for key, value in six.iteritems(kwargs): 141 | setattr(func, key, value) 142 | return func 143 | return decorator 144 | 145 | 146 | def create_quote_fn(safe_charset, quote_plus): 147 | def quote_fn(s, dont_quote): 148 | if dont_quote is True: 149 | safe = safe_charset 150 | elif dont_quote is False: 151 | safe = '' 152 | else: # is expected to be a string. 153 | safe = dont_quote 154 | 155 | # Prune duplicates and characters not in . 156 | safe = ''.join(set(safe) & set(safe_charset)) # E.g. '?^#?' -> '?'. 157 | 158 | quoted = quote(s, safe) 159 | if quote_plus: 160 | quoted = quoted.replace('%20', '+') 161 | 162 | return quoted 163 | 164 | return quote_fn 165 | 166 | 167 | # 168 | # TODO(grun): Update some of the regex functions below to reflect the fact that 169 | # the valid encoding of Path segments differs slightly from the valid encoding 170 | # of Fragment Path segments. Similarly, the valid encodings of Query keys and 171 | # values differ slightly from the valid encodings of Fragment Query keys and 172 | # values. 173 | # 174 | # For example, '?' and '#' don't need to be encoded in Fragment Path segments 175 | # but they must be encoded in Path segments. Similarly, '#' doesn't need to be 176 | # encoded in Fragment Query keys and values, but must be encoded in Query keys 177 | # and values. 178 | # 179 | # Perhaps merge them with URLPath, FragmentPath, URLQuery, and 180 | # FragmentQuery when those new classes are created (see the TODO 181 | # currently at the top of the source, 02/03/2012). 182 | # 183 | 184 | # RFC 3986 (https://www.ietf.org/rfc/rfc3986.txt) 185 | # 186 | # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 187 | # 188 | # pct-encoded = "%" HEXDIG HEXDIG 189 | # 190 | # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 191 | # / "*" / "+" / "," / ";" / "=" 192 | # 193 | # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 194 | # 195 | # === Path === 196 | # segment = *pchar 197 | # 198 | # === Query === 199 | # query = *( pchar / "/" / "?" ) 200 | # 201 | # === Scheme === 202 | # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 203 | # 204 | PERCENT_REGEX = r'\%[a-fA-F\d][a-fA-F\d]' 205 | INVALID_HOST_CHARS = '!@#$%^&\'\"*()+=:;/' 206 | 207 | 208 | @static_vars(regex=re.compile( 209 | r'^([\w%s]|(%s))*$' % (re.escape('-.~:@!$&\'()*+,;='), PERCENT_REGEX))) 210 | def is_valid_encoded_path_segment(segment): 211 | return is_valid_encoded_path_segment.regex.match(segment) is not None 212 | 213 | 214 | @static_vars(regex=re.compile( 215 | r'^([\w%s]|(%s))*$' % (re.escape('-.~:@!$&\'()*+,;/?'), PERCENT_REGEX))) 216 | def is_valid_encoded_query_key(key): 217 | return is_valid_encoded_query_key.regex.match(key) is not None 218 | 219 | 220 | @static_vars(regex=re.compile( 221 | r'^([\w%s]|(%s))*$' % (re.escape('-.~:@!$&\'()*+,;/?='), PERCENT_REGEX))) 222 | def is_valid_encoded_query_value(value): 223 | return is_valid_encoded_query_value.regex.match(value) is not None 224 | 225 | 226 | @static_vars(regex=re.compile(r'[a-zA-Z][a-zA-Z\-\.\+]*')) 227 | def is_valid_scheme(scheme): 228 | return is_valid_scheme.regex.match(scheme) is not None 229 | 230 | 231 | @static_vars(regex=re.compile('[%s]' % re.escape(INVALID_HOST_CHARS))) 232 | def is_valid_host(hostname): 233 | toks = hostname.split('.') 234 | if toks[-1] == '': # Trailing '.' in a fully qualified domain name. 235 | toks.pop() 236 | 237 | for tok in toks: 238 | if is_valid_host.regex.search(tok) is not None: 239 | return False 240 | 241 | return '' not in toks # Adjacent periods aren't allowed. 242 | 243 | 244 | def get_scheme(url): 245 | if url.startswith(':'): 246 | return '' 247 | 248 | # Avoid incorrect scheme extraction with url.find(':') when other URL 249 | # components, like the path, query, fragment, etc, may have a colon in 250 | # them. For example, the URL 'a?query:', whose query has a ':' in it. 251 | no_fragment = url.split('#', 1)[0] 252 | no_query = no_fragment.split('?', 1)[0] 253 | no_path_or_netloc = no_query.split('/', 1)[0] 254 | scheme = url[:max(0, no_path_or_netloc.find(':'))] or None 255 | 256 | if scheme is not None and not is_valid_scheme(scheme): 257 | return None 258 | 259 | return scheme 260 | 261 | 262 | def strip_scheme(url): 263 | scheme = get_scheme(url) or '' 264 | url = url[len(scheme):] 265 | if url.startswith(':'): 266 | url = url[1:] 267 | return url 268 | 269 | 270 | def set_scheme(url, scheme): 271 | after_scheme = strip_scheme(url) 272 | if scheme is None: 273 | return after_scheme 274 | else: 275 | return '%s:%s' % (scheme, after_scheme) 276 | 277 | 278 | # 'netloc' in Python parlance, 'authority' in RFC 3986 parlance. 279 | def has_netloc(url): 280 | scheme = get_scheme(url) 281 | return url.startswith('//' if scheme is None else scheme + '://') 282 | 283 | 284 | def urlsplit(url): 285 | """ 286 | Parameters: 287 | url: URL string to split. 288 | Returns: urlparse.SplitResult tuple subclass, just like 289 | urlparse.urlsplit() returns, with fields (scheme, netloc, path, 290 | query, fragment, username, password, hostname, port). See 291 | http://docs.python.org/library/urlparse.html#urlparse.urlsplit 292 | for more details on urlsplit(). 293 | """ 294 | original_scheme = get_scheme(url) 295 | 296 | # urlsplit() parses URLs differently depending on whether or not the URL's 297 | # scheme is in any of 298 | # 299 | # urllib.parse.uses_fragment 300 | # urllib.parse.uses_netloc 301 | # urllib.parse.uses_params 302 | # urllib.parse.uses_query 303 | # urllib.parse.uses_relative 304 | # 305 | # For consistent URL parsing, switch the URL's scheme to 'http', a scheme 306 | # in all of the aforementioned uses_* lists, and afterwards revert to the 307 | # original scheme (which may or may not be in some, or all, of the the 308 | # uses_* lists). 309 | if original_scheme is not None: 310 | url = set_scheme(url, 'http') 311 | 312 | scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url) 313 | 314 | # Detect and preserve the '//' before the netloc, if present. E.g. preserve 315 | # URLs like 'http:', 'http://', and '///sup' correctly. 316 | after_scheme = strip_scheme(url) 317 | if after_scheme.startswith('//'): 318 | netloc = netloc or '' 319 | else: 320 | netloc = None 321 | 322 | scheme = original_scheme 323 | 324 | return urllib.parse.SplitResult(scheme, netloc, path, query, fragment) 325 | 326 | 327 | def urljoin(base, url): 328 | """ 329 | Parameters: 330 | base: Base URL to join with . 331 | url: Relative or absolute URL to join with . 332 | 333 | Returns: The resultant URL from joining and . 334 | """ 335 | base_scheme = get_scheme(base) if has_netloc(base) else None 336 | url_scheme = get_scheme(url) if has_netloc(url) else None 337 | 338 | if base_scheme is not None: 339 | # For consistent URL joining, switch the base URL's scheme to 340 | # 'http'. urllib.parse.urljoin() behaves differently depending on the 341 | # scheme. E.g. 342 | # 343 | # >>> urllib.parse.urljoin('http://google.com/', 'hi') 344 | # 'http://google.com/hi' 345 | # 346 | # vs 347 | # 348 | # >>> urllib.parse.urljoin('asdf://google.com/', 'hi') 349 | # 'hi' 350 | root = set_scheme(base, 'http') 351 | else: 352 | root = base 353 | 354 | joined = urllib.parse.urljoin(root, url) 355 | 356 | new_scheme = url_scheme if url_scheme is not None else base_scheme 357 | if new_scheme is not None and has_netloc(joined): 358 | joined = set_scheme(joined, new_scheme) 359 | 360 | return joined 361 | 362 | 363 | def join_path_segments(*args): 364 | """ 365 | Join multiple lists of path segments together, intelligently 366 | handling path segments borders to preserve intended slashes of the 367 | final constructed path. 368 | 369 | This function is not encoding aware. It doesn't test for, or change, 370 | the encoding of path segments it is passed. 371 | 372 | Examples: 373 | join_path_segments(['a'], ['b']) == ['a','b'] 374 | join_path_segments(['a',''], ['b']) == ['a','b'] 375 | join_path_segments(['a'], ['','b']) == ['a','b'] 376 | join_path_segments(['a',''], ['','b']) == ['a','','b'] 377 | join_path_segments(['a','b'], ['c','d']) == ['a','b','c','d'] 378 | 379 | Returns: A list containing the joined path segments. 380 | """ 381 | finals = [] 382 | 383 | for segments in args: 384 | if not segments or segments == ['']: 385 | continue 386 | elif not finals: 387 | finals.extend(segments) 388 | else: 389 | # Example #1: ['a',''] + ['b'] == ['a','b'] 390 | # Example #2: ['a',''] + ['','b'] == ['a','','b'] 391 | if finals[-1] == '' and (segments[0] != '' or len(segments) > 1): 392 | finals.pop(-1) 393 | # Example: ['a'] + ['','b'] == ['a','b'] 394 | elif finals[-1] != '' and segments[0] == '' and len(segments) > 1: 395 | segments = segments[1:] 396 | finals.extend(segments) 397 | 398 | return finals 399 | 400 | 401 | def remove_path_segments(segments, remove): 402 | """ 403 | Removes the path segments of from the end of the path 404 | segments . 405 | 406 | Examples: 407 | # ('/a/b/c', 'b/c') -> '/a/' 408 | remove_path_segments(['','a','b','c'], ['b','c']) == ['','a',''] 409 | # ('/a/b/c', '/b/c') -> '/a' 410 | remove_path_segments(['','a','b','c'], ['','b','c']) == ['','a'] 411 | 412 | Returns: The list of all remaining path segments after the segments 413 | in have been removed from the end of . If no 414 | segments from were removed from , is 415 | returned unmodified. 416 | """ 417 | # [''] means a '/', which is properly represented by ['', '']. 418 | if segments == ['']: 419 | segments.append('') 420 | if remove == ['']: 421 | remove.append('') 422 | 423 | ret = None 424 | if remove == segments: 425 | ret = [] 426 | elif len(remove) > len(segments): 427 | ret = segments 428 | else: 429 | toremove = list(remove) 430 | 431 | if len(remove) > 1 and remove[0] == '': 432 | toremove.pop(0) 433 | 434 | if toremove and toremove == segments[-1 * len(toremove):]: 435 | ret = segments[:len(segments) - len(toremove)] 436 | if remove[0] != '' and ret: 437 | ret.append('') 438 | else: 439 | ret = segments 440 | 441 | return ret 442 | 443 | 444 | def quacks_like_a_path_with_segments(obj): 445 | return ( 446 | hasattr(obj, 'segments') and 447 | is_iterable_but_not_string(obj.segments)) 448 | 449 | 450 | class Path(object): 451 | 452 | """ 453 | Represents a path comprised of zero or more path segments. 454 | 455 | http://tools.ietf.org/html/rfc3986#section-3.3 456 | 457 | Path parameters aren't supported. 458 | 459 | Attributes: 460 | _force_absolute: Function whos boolean return value specifies 461 | whether self.isabsolute should be forced to True or not. If 462 | _force_absolute(self) returns True, isabsolute is read only and 463 | raises an AttributeError if assigned to. If 464 | _force_absolute(self) returns False, isabsolute is mutable and 465 | can be set to True or False. URL paths use _force_absolute and 466 | return True if the netloc is non-empty (not equal to 467 | ''). Fragment paths are never read-only and their 468 | _force_absolute(self) always returns False. 469 | segments: List of zero or more path segments comprising this 470 | path. If the path string has a trailing '/', the last segment 471 | will be '' and self.isdir will be True and self.isfile will be 472 | False. An empty segment list represents an empty path, not '/' 473 | (though they have the same meaning). 474 | isabsolute: Boolean whether or not this is an absolute path or 475 | not. An absolute path starts with a '/'. self.isabsolute is 476 | False if the path is empty (self.segments == [] and str(path) == 477 | ''). 478 | strict: Boolean whether or not UserWarnings should be raised if 479 | improperly encoded path strings are provided to methods that 480 | take such strings, like load(), add(), set(), remove(), etc. 481 | """ 482 | 483 | # From RFC 3986: 484 | # segment = *pchar 485 | # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 486 | # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 487 | # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 488 | # / "*" / "+" / "," / ";" / "=" 489 | SAFE_SEGMENT_CHARS = ":@-._~!$&'()*+,;=" 490 | 491 | def __init__(self, path='', force_absolute=lambda _: False, strict=False): 492 | self.segments = [] 493 | 494 | self.strict = strict 495 | self._isabsolute = False 496 | self._force_absolute = force_absolute 497 | 498 | self.load(path) 499 | 500 | def load(self, path): 501 | """ 502 | Load , replacing any existing path. can either be 503 | a Path instance, a list of segments, a path string to adopt. 504 | 505 | Returns: . 506 | """ 507 | if not path: 508 | segments = [] 509 | elif quacks_like_a_path_with_segments(path): # Path interface. 510 | segments = path.segments 511 | elif is_iterable_but_not_string(path): # List interface. 512 | segments = path 513 | else: # String interface. 514 | segments = self._segments_from_path(path) 515 | 516 | if self._force_absolute(self): 517 | self._isabsolute = True if segments else False 518 | else: 519 | self._isabsolute = (segments and segments[0] == '') 520 | 521 | if self.isabsolute and len(segments) > 1 and segments[0] == '': 522 | segments.pop(0) 523 | 524 | self.segments = segments 525 | 526 | return self 527 | 528 | def add(self, path): 529 | """ 530 | Add to the existing path. can either be a Path instance, 531 | a list of segments, or a path string to append to the existing path. 532 | 533 | Returns: . 534 | """ 535 | if quacks_like_a_path_with_segments(path): # Path interface. 536 | newsegments = path.segments 537 | elif is_iterable_but_not_string(path): # List interface. 538 | newsegments = path 539 | else: # String interface. 540 | newsegments = self._segments_from_path(path) 541 | 542 | # Preserve the opening '/' if one exists already (self.segments 543 | # == ['']). 544 | if self.segments == [''] and newsegments and newsegments[0] != '': 545 | newsegments.insert(0, '') 546 | 547 | segments = self.segments 548 | if self.isabsolute and self.segments and self.segments[0] != '': 549 | segments.insert(0, '') 550 | 551 | self.load(join_path_segments(segments, newsegments)) 552 | 553 | return self 554 | 555 | def set(self, path): 556 | self.load(path) 557 | return self 558 | 559 | def remove(self, path): 560 | if path is True: 561 | self.load('') 562 | else: 563 | if is_iterable_but_not_string(path): # List interface. 564 | segments = path 565 | else: # String interface. 566 | segments = self._segments_from_path(path) 567 | base = ([''] if self.isabsolute else []) + self.segments 568 | self.load(remove_path_segments(base, segments)) 569 | 570 | return self 571 | 572 | def normalize(self): 573 | """ 574 | Normalize the path. Turn '//a/./b/../c//' into '/a/c/'. 575 | 576 | Returns: . 577 | """ 578 | if str(self): 579 | normalized = normpath(str(self)) + ('/' * self.isdir) 580 | if normalized.startswith('//'): # http://bugs.python.org/636648 581 | normalized = '/' + normalized.lstrip('/') 582 | self.load(normalized) 583 | 584 | return self 585 | 586 | def asdict(self): 587 | return { 588 | 'encoded': str(self), 589 | 'isdir': self.isdir, 590 | 'isfile': self.isfile, 591 | 'segments': self.segments, 592 | 'isabsolute': self.isabsolute, 593 | } 594 | 595 | @property 596 | def isabsolute(self): 597 | if self._force_absolute(self): 598 | return True 599 | return self._isabsolute 600 | 601 | @isabsolute.setter 602 | def isabsolute(self, isabsolute): 603 | """ 604 | Raises: AttributeError if _force_absolute(self) returns True. 605 | """ 606 | if self._force_absolute(self): 607 | s = ('Path.isabsolute is True and read-only for URLs with a netloc' 608 | ' (a username, password, host, and/or port). A URL path must ' 609 | "start with a '/' to separate itself from a netloc.") 610 | raise AttributeError(s) 611 | self._isabsolute = isabsolute 612 | 613 | @property 614 | def isdir(self): 615 | """ 616 | Returns: True if the path ends on a directory, False 617 | otherwise. If True, the last segment is '', representing the 618 | trailing '/' of the path. 619 | """ 620 | return (self.segments == [] or 621 | (self.segments and self.segments[-1] == '')) 622 | 623 | @property 624 | def isfile(self): 625 | """ 626 | Returns: True if the path ends on a file, False otherwise. If 627 | True, the last segment is not '', representing some file as the 628 | last segment of the path. 629 | """ 630 | return not self.isdir 631 | 632 | def __truediv__(self, path): 633 | copy = deepcopy(self) 634 | return copy.add(path) 635 | 636 | def __eq__(self, other): 637 | return str(self) == str(other) 638 | 639 | def __ne__(self, other): 640 | return not self == other 641 | 642 | def __bool__(self): 643 | return len(self.segments) > 0 644 | __nonzero__ = __bool__ 645 | 646 | def __str__(self): 647 | segments = list(self.segments) 648 | if self.isabsolute: 649 | if not segments: 650 | segments = ['', ''] 651 | else: 652 | segments.insert(0, '') 653 | return self._path_from_segments(segments) 654 | 655 | def __repr__(self): 656 | return "%s('%s')" % (self.__class__.__name__, str(self)) 657 | 658 | def _segments_from_path(self, path): 659 | """ 660 | Returns: The list of path segments from the path string . 661 | 662 | Raises: UserWarning if is an improperly encoded path 663 | string and self.strict is True. 664 | 665 | TODO(grun): Accept both list values and string values and 666 | refactor the list vs string interface testing to this common 667 | method. 668 | """ 669 | segments = [] 670 | for segment in path.split('/'): 671 | if not is_valid_encoded_path_segment(segment): 672 | segment = quote(utf8(segment)) 673 | if self.strict: 674 | s = ("Improperly encoded path string received: '%s'. " 675 | "Proceeding, but did you mean '%s'?" % 676 | (path, self._path_from_segments(segments))) 677 | warnings.warn(s, UserWarning) 678 | segments.append(utf8(segment)) 679 | del segment 680 | 681 | # In Python 3, utf8() returns Bytes objects that must be decoded into 682 | # strings before they can be passed to unquote(). In Python 2, utf8() 683 | # returns strings that can be passed directly to urllib.unquote(). 684 | segments = [ 685 | segment.decode('utf8') 686 | if isinstance(segment, bytes) and not isinstance(segment, str) 687 | else segment for segment in segments] 688 | 689 | return [unquote(segment) for segment in segments] 690 | 691 | def _path_from_segments(self, segments): 692 | """ 693 | Combine the provided path segments into a path string. Path 694 | segments in will be quoted. 695 | 696 | Returns: A path string with quoted path segments. 697 | """ 698 | segments = [ 699 | quote(utf8(attemptstr(segment)), self.SAFE_SEGMENT_CHARS) 700 | for segment in segments] 701 | return '/'.join(segments) 702 | 703 | 704 | @six.add_metaclass(abc.ABCMeta) 705 | class PathCompositionInterface(object): 706 | 707 | """ 708 | Abstract class interface for a parent class that contains a Path. 709 | """ 710 | 711 | def __init__(self, strict=False): 712 | """ 713 | Params: 714 | force_absolute: See Path._force_absolute. 715 | 716 | Assignments to in __init__() must be added to 717 | __setattr__() below. 718 | """ 719 | self._path = Path(force_absolute=self._force_absolute, strict=strict) 720 | 721 | @property 722 | def path(self): 723 | return self._path 724 | 725 | @property 726 | def pathstr(self): 727 | """This method is deprecated. Use str(furl.path) instead.""" 728 | s = ('furl.pathstr is deprecated. Use str(furl.path) instead. There ' 729 | 'should be one, and preferably only one, obvious way to serialize' 730 | ' a Path object to a string.') 731 | warnings.warn(s, DeprecationWarning) 732 | return str(self._path) 733 | 734 | @abc.abstractmethod 735 | def _force_absolute(self, path): 736 | """ 737 | Subclass me. 738 | """ 739 | pass 740 | 741 | def __setattr__(self, attr, value): 742 | """ 743 | Returns: True if this attribute is handled and set here, False 744 | otherwise. 745 | """ 746 | if attr == '_path': 747 | self.__dict__[attr] = value 748 | return True 749 | elif attr == 'path': 750 | self._path.load(value) 751 | return True 752 | return False 753 | 754 | 755 | @six.add_metaclass(abc.ABCMeta) 756 | class URLPathCompositionInterface(PathCompositionInterface): 757 | 758 | """ 759 | Abstract class interface for a parent class that contains a URL 760 | Path. 761 | 762 | A URL path's isabsolute attribute is absolute and read-only if a 763 | netloc is defined. A path cannot start without '/' if there's a 764 | netloc. For example, the URL 'http://google.coma/path' makes no 765 | sense. It should be 'http://google.com/a/path'. 766 | 767 | A URL path's isabsolute attribute is mutable if there's no 768 | netloc. The scheme doesn't matter. For example, the isabsolute 769 | attribute of the URL path in 'mailto:user@host.com', with scheme 770 | 'mailto' and path 'user@host.com', is mutable because there is no 771 | netloc. See 772 | 773 | http://en.wikipedia.org/wiki/URI_scheme#Examples 774 | """ 775 | 776 | def __init__(self, strict=False): 777 | PathCompositionInterface.__init__(self, strict=strict) 778 | 779 | def _force_absolute(self, path): 780 | return bool(path) and self.netloc 781 | 782 | 783 | @six.add_metaclass(abc.ABCMeta) 784 | class FragmentPathCompositionInterface(PathCompositionInterface): 785 | 786 | """ 787 | Abstract class interface for a parent class that contains a Fragment 788 | Path. 789 | 790 | Fragment Paths they be set to absolute (self.isabsolute = True) or 791 | not absolute (self.isabsolute = False). 792 | """ 793 | 794 | def __init__(self, strict=False): 795 | PathCompositionInterface.__init__(self, strict=strict) 796 | 797 | def _force_absolute(self, path): 798 | return False 799 | 800 | 801 | class Query(object): 802 | 803 | """ 804 | Represents a URL query comprised of zero or more unique parameters 805 | and their respective values. 806 | 807 | http://tools.ietf.org/html/rfc3986#section-3.4 808 | 809 | 810 | All interaction with Query.params is done with unquoted strings. So 811 | 812 | f.query.params['a'] = 'a%5E' 813 | 814 | means the intended value for 'a' is 'a%5E', not 'a^'. 815 | 816 | 817 | Query.params is implemented as an omdict1D object - a one 818 | dimensional ordered multivalue dictionary. This provides support for 819 | repeated URL parameters, like 'a=1&a=2'. omdict1D is a subclass of 820 | omdict, an ordered multivalue dictionary. Documentation for omdict 821 | can be found here 822 | 823 | https://github.com/gruns/orderedmultidict 824 | 825 | The one dimensional aspect of omdict1D means that a list of values 826 | is interpreted as multiple values, not a single value which is 827 | itself a list of values. This is a reasonable distinction to make 828 | because URL query parameters are one dimensional: query parameter 829 | values cannot themselves be composed of sub-values. 830 | 831 | So what does this mean? This means we can safely interpret 832 | 833 | f = furl('http://www.google.com') 834 | f.query.params['arg'] = ['one', 'two', 'three'] 835 | 836 | as three different values for 'arg': 'one', 'two', and 'three', 837 | instead of a single value which is itself some serialization of the 838 | python list ['one', 'two', 'three']. Thus, the result of the above 839 | will be 840 | 841 | f.query.allitems() == [ 842 | ('arg','one'), ('arg','two'), ('arg','three')] 843 | 844 | and not 845 | 846 | f.query.allitems() == [('arg', ['one', 'two', 'three'])] 847 | 848 | The latter doesn't make sense because query parameter values cannot 849 | be composed of sub-values. So finally 850 | 851 | str(f.query) == 'arg=one&arg=two&arg=three' 852 | 853 | 854 | Additionally, while the set of allowed characters in URL queries is 855 | defined in RFC 3986 section 3.4, the format for encoding key=value 856 | pairs within the query is not. In turn, the parsing of encoded 857 | key=value query pairs differs between implementations. 858 | 859 | As a compromise to support equal signs in both key=value pair 860 | encoded queries, like 861 | 862 | https://www.google.com?a=1&b=2 863 | 864 | and non-key=value pair encoded queries, like 865 | 866 | https://www.google.com?===3=== 867 | 868 | equal signs are percent encoded in key=value pairs where the key is 869 | non-empty, e.g. 870 | 871 | https://www.google.com?equal-sign=%3D 872 | 873 | but not encoded in key=value pairs where the key is empty, e.g. 874 | 875 | https://www.google.com?===equal=sign=== 876 | 877 | This presents a reasonable compromise to accurately reproduce 878 | non-key=value queries with equal signs while also still percent 879 | encoding equal signs in key=value pair encoded queries, as 880 | expected. See 881 | 882 | https://github.com/gruns/furl/issues/99 883 | 884 | for more details. 885 | 886 | Attributes: 887 | params: Ordered multivalue dictionary of query parameter key:value 888 | pairs. Parameters in self.params are maintained URL decoded, 889 | e.g. 'a b' not 'a+b'. 890 | strict: Boolean whether or not UserWarnings should be raised if 891 | improperly encoded query strings are provided to methods that 892 | take such strings, like load(), add(), set(), remove(), etc. 893 | """ 894 | 895 | # From RFC 3986: 896 | # query = *( pchar / "/" / "?" ) 897 | # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 898 | # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 899 | # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 900 | # / "*" / "+" / "," / ";" / "=" 901 | SAFE_KEY_CHARS = "/?:@-._~!$'()*+,;" 902 | SAFE_VALUE_CHARS = SAFE_KEY_CHARS + '=' 903 | 904 | def __init__(self, query='', strict=False): 905 | self.strict = strict 906 | 907 | self._params = omdict1D() 908 | 909 | self.load(query) 910 | 911 | def load(self, query): 912 | items = self._items(query) 913 | self.params.load(items) 914 | return self 915 | 916 | def add(self, args): 917 | for param, value in self._items(args): 918 | self.params.add(param, value) 919 | return self 920 | 921 | def set(self, mapping): 922 | """ 923 | Adopt all mappings in , replacing any existing mappings 924 | with the same key. If a key has multiple values in , 925 | they are all adopted. 926 | 927 | Examples: 928 | Query({1:1}).set([(1,None),(2,2)]).params.allitems() 929 | == [(1,None),(2,2)] 930 | Query({1:None,2:None}).set([(1,1),(2,2),(1,11)]).params.allitems() 931 | == [(1,1),(2,2),(1,11)] 932 | Query({1:None}).set([(1,[1,11,111])]).params.allitems() 933 | == [(1,1),(1,11),(1,111)] 934 | 935 | Returns: . 936 | """ 937 | self.params.updateall(mapping) 938 | return self 939 | 940 | def remove(self, query): 941 | if query is True: 942 | self.load('') 943 | return self 944 | 945 | # Single key to remove. 946 | items = [query] 947 | # Dictionary or multivalue dictionary of items to remove. 948 | if callable_attr(query, 'items'): 949 | items = self._items(query) 950 | # List of keys or items to remove. 951 | elif non_string_iterable(query): 952 | items = query 953 | 954 | for item in items: 955 | if non_string_iterable(item) and len(item) == 2: 956 | key, value = item 957 | self.params.popvalue(key, value, None) 958 | else: 959 | key = item 960 | self.params.pop(key, None) 961 | 962 | return self 963 | 964 | @property 965 | def params(self): 966 | return self._params 967 | 968 | @params.setter 969 | def params(self, params): 970 | items = self._items(params) 971 | 972 | self._params.clear() 973 | for key, value in items: 974 | self._params.add(key, value) 975 | 976 | def encode(self, delimiter='&', quote_plus=True, dont_quote='', 977 | delimeter=_absent): 978 | """ 979 | Examples: 980 | 981 | Query('a=a&b=#').encode() == 'a=a&b=%23' 982 | Query('a=a&b=#').encode(';') == 'a=a;b=%23' 983 | Query('a+b=c@d').encode(dont_quote='@') == 'a+b=c@d' 984 | Query('a+b=c@d').encode(quote_plus=False) == 'a%20b=c%40d' 985 | 986 | Until furl v0.4.6, the 'delimiter' argument was incorrectly 987 | spelled 'delimeter'. For backwards compatibility, accept both 988 | the correct 'delimiter' and the old, misspelled 'delimeter'. 989 | 990 | Keys and values are encoded application/x-www-form-urlencoded if 991 | is True, percent-encoded otherwise. 992 | 993 | exempts valid query characters from being 994 | percent-encoded, either in their entirety with dont_quote=True, 995 | or selectively with dont_quote=, like 996 | dont_quote='/?@_'. Invalid query characters -- those not in 997 | self.SAFE_KEY_CHARS, like '#' and '^' -- are always encoded, 998 | even if included in . For example: 999 | 1000 | Query('#=^').encode(dont_quote='#^') == '%23=%5E'. 1001 | 1002 | Returns: A URL encoded query string using as the 1003 | delimiter separating key:value pairs. The most common and 1004 | default delimiter is '&', but ';' can also be specified. ';' is 1005 | W3C recommended. 1006 | """ 1007 | if delimeter is not _absent: 1008 | delimiter = delimeter 1009 | 1010 | quote_key = create_quote_fn(self.SAFE_KEY_CHARS, quote_plus) 1011 | quote_value = create_quote_fn(self.SAFE_VALUE_CHARS, quote_plus) 1012 | 1013 | pairs = [] 1014 | for key, value in self.params.iterallitems(): 1015 | utf8key = utf8(key, utf8(attemptstr(key))) 1016 | quoted_key = quote_key(utf8key, dont_quote) 1017 | 1018 | if value is None: # Example: http://sprop.su/?key. 1019 | pair = quoted_key 1020 | else: # Example: http://sprop.su/?key=value. 1021 | utf8value = utf8(value, utf8(attemptstr(value))) 1022 | quoted_value = quote_value(utf8value, dont_quote) 1023 | 1024 | if not quoted_key: # Unquote '=' to allow queries like '?==='. 1025 | quoted_value = quoted_value.replace('%3D', '=') 1026 | 1027 | pair = '%s=%s' % (quoted_key, quoted_value) 1028 | 1029 | pairs.append(pair) 1030 | 1031 | query = delimiter.join(pairs) 1032 | 1033 | return query 1034 | 1035 | def asdict(self): 1036 | return { 1037 | 'encoded': str(self), 1038 | 'params': self.params.allitems(), 1039 | } 1040 | 1041 | def __eq__(self, other): 1042 | return str(self) == str(other) 1043 | 1044 | def __ne__(self, other): 1045 | return not self == other 1046 | 1047 | def __bool__(self): 1048 | return len(self.params) > 0 1049 | __nonzero__ = __bool__ 1050 | 1051 | def __str__(self): 1052 | return self.encode() 1053 | 1054 | def __repr__(self): 1055 | return "%s('%s')" % (self.__class__.__name__, str(self)) 1056 | 1057 | def _items(self, items): 1058 | """ 1059 | Extract and return the key:value items from various 1060 | containers. Some containers that could hold key:value items are 1061 | 1062 | - List of (key,value) tuples. 1063 | - Dictionaries of key:value items. 1064 | - Multivalue dictionary of key:value items, with potentially 1065 | repeated keys. 1066 | - Query string with encoded params and values. 1067 | 1068 | Keys and values are passed through unmodified unless they were 1069 | passed in within an encoded query string, like 1070 | 'a=a%20a&b=b'. Keys and values passed in within an encoded query 1071 | string are unquoted by urlparse.parse_qsl(), which uses 1072 | urllib.unquote_plus() internally. 1073 | 1074 | Returns: List of items as (key, value) tuples. Keys and values 1075 | are passed through unmodified unless they were passed in as part 1076 | of an encoded query string, in which case the final keys and 1077 | values that are returned will be unquoted. 1078 | 1079 | Raises: UserWarning if is an improperly encoded path 1080 | string and self.strict is True. 1081 | """ 1082 | if not items: 1083 | items = [] 1084 | # Multivalue Dictionary-like interface. e.g. {'a':1, 'a':2, 1085 | # 'b':2} 1086 | elif callable_attr(items, 'allitems'): 1087 | items = list(items.allitems()) 1088 | elif callable_attr(items, 'iterallitems'): 1089 | items = list(items.iterallitems()) 1090 | # Dictionary-like interface. e.g. {'a':1, 'b':2, 'c':3} 1091 | elif callable_attr(items, 'items'): 1092 | items = list(items.items()) 1093 | elif callable_attr(items, 'iteritems'): 1094 | items = list(items.iteritems()) 1095 | # Encoded query string. e.g. 'a=1&b=2&c=3' 1096 | elif isinstance(items, six.string_types): 1097 | items = self._extract_items_from_querystr(items) 1098 | # Default to list of key:value items interface. e.g. [('a','1'), 1099 | # ('b','2')] 1100 | else: 1101 | items = list(items) 1102 | 1103 | return items 1104 | 1105 | def _extract_items_from_querystr(self, querystr): 1106 | items = [] 1107 | 1108 | pairstrs = querystr.split('&') 1109 | pairs = [item.split('=', 1) for item in pairstrs] 1110 | pairs = [(p[0], lget(p, 1, '')) for p in pairs] # Pad with value ''. 1111 | 1112 | for pairstr, (key, value) in six.moves.zip(pairstrs, pairs): 1113 | valid_key = is_valid_encoded_query_key(key) 1114 | valid_value = is_valid_encoded_query_value(value) 1115 | if self.strict and (not valid_key or not valid_value): 1116 | msg = ( 1117 | "Incorrectly percent encoded query string received: '%s'. " 1118 | "Proceeding, but did you mean '%s'?" % 1119 | (querystr, urllib.parse.urlencode(pairs))) 1120 | warnings.warn(msg, UserWarning) 1121 | 1122 | key_decoded = unquote(key.replace('+', ' ')) 1123 | # Empty value without a '=', e.g. '?sup'. 1124 | if key == pairstr: 1125 | value_decoded = None 1126 | else: 1127 | value_decoded = unquote(value.replace('+', ' ')) 1128 | 1129 | items.append((key_decoded, value_decoded)) 1130 | 1131 | return items 1132 | 1133 | 1134 | @six.add_metaclass(abc.ABCMeta) 1135 | class QueryCompositionInterface(object): 1136 | 1137 | """ 1138 | Abstract class interface for a parent class that contains a Query. 1139 | """ 1140 | 1141 | def __init__(self, strict=False): 1142 | self._query = Query(strict=strict) 1143 | 1144 | @property 1145 | def query(self): 1146 | return self._query 1147 | 1148 | @property 1149 | def querystr(self): 1150 | """This method is deprecated. Use str(furl.query) instead.""" 1151 | s = ('furl.querystr is deprecated. Use str(furl.query) instead. There ' 1152 | 'should be one, and preferably only one, obvious way to serialize' 1153 | ' a Query object to a string.') 1154 | warnings.warn(s, DeprecationWarning) 1155 | return str(self._query) 1156 | 1157 | @property 1158 | def args(self): 1159 | """ 1160 | Shortcut method to access the query parameters, self._query.params. 1161 | """ 1162 | return self._query.params 1163 | 1164 | def __setattr__(self, attr, value): 1165 | """ 1166 | Returns: True if this attribute is handled and set here, False 1167 | otherwise. 1168 | """ 1169 | if attr == 'args' or attr == 'query': 1170 | self._query.load(value) 1171 | return True 1172 | return False 1173 | 1174 | 1175 | class Fragment(FragmentPathCompositionInterface, QueryCompositionInterface): 1176 | 1177 | """ 1178 | Represents a URL fragment, comprised internally of a Path and Query 1179 | optionally separated by a '?' character. 1180 | 1181 | http://tools.ietf.org/html/rfc3986#section-3.5 1182 | 1183 | Attributes: 1184 | path: Path object from FragmentPathCompositionInterface. 1185 | query: Query object from QueryCompositionInterface. 1186 | separator: Boolean whether or not a '?' separator should be 1187 | included in the string representation of this fragment. When 1188 | False, a '?' character will not separate the fragment path from 1189 | the fragment query in the fragment string. This is useful to 1190 | build fragments like '#!arg1=val1&arg2=val2', where no 1191 | separating '?' is desired. 1192 | """ 1193 | 1194 | def __init__(self, fragment='', strict=False): 1195 | FragmentPathCompositionInterface.__init__(self, strict=strict) 1196 | QueryCompositionInterface.__init__(self, strict=strict) 1197 | self.strict = strict 1198 | self.separator = True 1199 | 1200 | self.load(fragment) 1201 | 1202 | def load(self, fragment): 1203 | self.path.load('') 1204 | self.query.load('') 1205 | 1206 | if fragment is None: 1207 | fragment = '' 1208 | 1209 | toks = fragment.split('?', 1) 1210 | if len(toks) == 0: 1211 | self._path.load('') 1212 | self._query.load('') 1213 | elif len(toks) == 1: 1214 | # Does this fragment look like a path or a query? Default to 1215 | # path. 1216 | if '=' in fragment: # Query example: '#woofs=dogs'. 1217 | self._query.load(fragment) 1218 | else: # Path example: '#supinthisthread'. 1219 | self._path.load(fragment) 1220 | else: 1221 | # Does toks[1] actually look like a query? Like 'a=a' or 1222 | # 'a=' or '=a'? 1223 | if '=' in toks[1]: 1224 | self._path.load(toks[0]) 1225 | self._query.load(toks[1]) 1226 | # If toks[1] doesn't look like a query, the user probably 1227 | # provided a fragment string like 'a?b?' that was intended 1228 | # to be adopted as-is, not a two part fragment with path 'a' 1229 | # and query 'b?'. 1230 | else: 1231 | self._path.load(fragment) 1232 | 1233 | def add(self, path=_absent, args=_absent): 1234 | if path is not _absent: 1235 | self.path.add(path) 1236 | if args is not _absent: 1237 | self.query.add(args) 1238 | 1239 | return self 1240 | 1241 | def set(self, path=_absent, args=_absent, separator=_absent): 1242 | if path is not _absent: 1243 | self.path.load(path) 1244 | if args is not _absent: 1245 | self.query.load(args) 1246 | if separator is True or separator is False: 1247 | self.separator = separator 1248 | 1249 | return self 1250 | 1251 | def remove(self, fragment=_absent, path=_absent, args=_absent): 1252 | if fragment is True: 1253 | self.load('') 1254 | if path is not _absent: 1255 | self.path.remove(path) 1256 | if args is not _absent: 1257 | self.query.remove(args) 1258 | 1259 | return self 1260 | 1261 | def asdict(self): 1262 | return { 1263 | 'encoded': str(self), 1264 | 'separator': self.separator, 1265 | 'path': self.path.asdict(), 1266 | 'query': self.query.asdict(), 1267 | } 1268 | 1269 | def __eq__(self, other): 1270 | return str(self) == str(other) 1271 | 1272 | def __ne__(self, other): 1273 | return not self == other 1274 | 1275 | def __setattr__(self, attr, value): 1276 | if (not PathCompositionInterface.__setattr__(self, attr, value) and 1277 | not QueryCompositionInterface.__setattr__(self, attr, value)): 1278 | object.__setattr__(self, attr, value) 1279 | 1280 | def __bool__(self): 1281 | return bool(self.path) or bool(self.query) 1282 | __nonzero__ = __bool__ 1283 | 1284 | def __str__(self): 1285 | path, query = str(self._path), str(self._query) 1286 | 1287 | # If there is no query or self.separator is False, decode all 1288 | # '?' characters in the path from their percent encoded form 1289 | # '%3F' to '?'. This allows for fragment strings containg '?'s, 1290 | # like '#dog?machine?yes'. 1291 | if path and (not query or not self.separator): 1292 | path = path.replace('%3F', '?') 1293 | 1294 | separator = '?' if path and query and self.separator else '' 1295 | 1296 | return path + separator + query 1297 | 1298 | def __repr__(self): 1299 | return "%s('%s')" % (self.__class__.__name__, str(self)) 1300 | 1301 | 1302 | @six.add_metaclass(abc.ABCMeta) 1303 | class FragmentCompositionInterface(object): 1304 | 1305 | """ 1306 | Abstract class interface for a parent class that contains a 1307 | Fragment. 1308 | """ 1309 | 1310 | def __init__(self, strict=False): 1311 | self._fragment = Fragment(strict=strict) 1312 | 1313 | @property 1314 | def fragment(self): 1315 | return self._fragment 1316 | 1317 | @property 1318 | def fragmentstr(self): 1319 | """This method is deprecated. Use str(furl.fragment) instead.""" 1320 | s = ('furl.fragmentstr is deprecated. Use str(furl.fragment) instead. ' 1321 | 'There should be one, and preferably only one, obvious way to ' 1322 | 'serialize a Fragment object to a string.') 1323 | warnings.warn(s, DeprecationWarning) 1324 | return str(self._fragment) 1325 | 1326 | def __setattr__(self, attr, value): 1327 | """ 1328 | Returns: True if this attribute is handled and set here, False 1329 | otherwise. 1330 | """ 1331 | if attr == 'fragment': 1332 | self.fragment.load(value) 1333 | return True 1334 | return False 1335 | 1336 | 1337 | class furl(URLPathCompositionInterface, QueryCompositionInterface, 1338 | FragmentCompositionInterface, UnicodeMixin): 1339 | 1340 | """ 1341 | Object for simple parsing and manipulation of a URL and its 1342 | components. 1343 | 1344 | scheme://username:password@host:port/path?query#fragment 1345 | 1346 | Attributes: 1347 | strict: Boolean whether or not UserWarnings should be raised if 1348 | improperly encoded path, query, or fragment strings are provided 1349 | to methods that take such strings, like load(), add(), set(), 1350 | remove(), etc. 1351 | username: Username string for authentication. Initially None. 1352 | password: Password string for authentication with 1353 | . Initially None. 1354 | scheme: URL scheme. A string ('http', 'https', '', etc) or None. 1355 | All lowercase. Initially None. 1356 | host: URL host (hostname, IPv4 address, or IPv6 address), not 1357 | including port. All lowercase. Initially None. 1358 | port: Port. Valid port values are 1-65535, or None meaning no port 1359 | specified. 1360 | netloc: Network location. Combined host and port string. Initially 1361 | None. 1362 | path: Path object from URLPathCompositionInterface. 1363 | query: Query object from QueryCompositionInterface. 1364 | fragment: Fragment object from FragmentCompositionInterface. 1365 | """ 1366 | 1367 | def __init__(self, url='', args=_absent, path=_absent, fragment=_absent, 1368 | scheme=_absent, netloc=_absent, origin=_absent, 1369 | fragment_path=_absent, fragment_args=_absent, 1370 | fragment_separator=_absent, host=_absent, port=_absent, 1371 | query=_absent, query_params=_absent, username=_absent, 1372 | password=_absent, strict=False): 1373 | """ 1374 | Raises: ValueError on invalid URL or invalid URL component(s) provided. 1375 | """ 1376 | URLPathCompositionInterface.__init__(self, strict=strict) 1377 | QueryCompositionInterface.__init__(self, strict=strict) 1378 | FragmentCompositionInterface.__init__(self, strict=strict) 1379 | self.strict = strict 1380 | 1381 | self.load(url) # Raises ValueError on invalid URL. 1382 | self.set( # Raises ValueError on invalid URL component(s). 1383 | args=args, path=path, fragment=fragment, scheme=scheme, 1384 | netloc=netloc, origin=origin, fragment_path=fragment_path, 1385 | fragment_args=fragment_args, fragment_separator=fragment_separator, 1386 | host=host, port=port, query=query, query_params=query_params, 1387 | username=username, password=password) 1388 | 1389 | def load(self, url): 1390 | """ 1391 | Parse and load a URL. 1392 | 1393 | Raises: ValueError on invalid URL, like a malformed IPv6 address 1394 | or invalid port. 1395 | """ 1396 | self.username = self.password = None 1397 | self._host = self._port = self._scheme = None 1398 | 1399 | if url is None: 1400 | url = '' 1401 | if not isinstance(url, six.string_types): 1402 | url = str(url) 1403 | 1404 | # urlsplit() raises a ValueError on malformed IPv6 addresses in 1405 | # Python 2.7+. 1406 | tokens = urlsplit(url) 1407 | 1408 | self.netloc = tokens.netloc # Raises ValueError in Python 2.7+. 1409 | self.scheme = tokens.scheme 1410 | if not self.port: 1411 | self._port = DEFAULT_PORTS.get(self.scheme) 1412 | self.path.load(tokens.path) 1413 | self.query.load(tokens.query) 1414 | self.fragment.load(tokens.fragment) 1415 | 1416 | return self 1417 | 1418 | @property 1419 | def scheme(self): 1420 | return self._scheme 1421 | 1422 | @scheme.setter 1423 | def scheme(self, scheme): 1424 | if callable_attr(scheme, 'lower'): 1425 | scheme = scheme.lower() 1426 | self._scheme = scheme 1427 | 1428 | @property 1429 | def host(self): 1430 | return self._host 1431 | 1432 | @host.setter 1433 | def host(self, host): 1434 | """ 1435 | Raises: ValueError on invalid host or malformed IPv6 address. 1436 | """ 1437 | # Invalid IPv6 literal. 1438 | urllib.parse.urlsplit('http://%s/' % host) # Raises ValueError. 1439 | 1440 | # Invalid host string. 1441 | resembles_ipv6_literal = ( 1442 | host is not None and lget(host, 0) == '[' and ':' in host and 1443 | lget(host, -1) == ']') 1444 | if (host is not None and not resembles_ipv6_literal and 1445 | not is_valid_host(host)): 1446 | errmsg = ( 1447 | "Invalid host '%s'. Host strings must have at least one " 1448 | "non-period character, can't contain any of '%s', and can't " 1449 | "have adjacent periods.") 1450 | raise ValueError(errmsg % (host, INVALID_HOST_CHARS)) 1451 | 1452 | if callable_attr(host, 'lower'): 1453 | host = host.lower() 1454 | if callable_attr(host, 'startswith') and host.startswith('xn--'): 1455 | host = idna_decode(host) 1456 | self._host = host 1457 | 1458 | @property 1459 | def port(self): 1460 | return self._port or DEFAULT_PORTS.get(self.scheme) 1461 | 1462 | @port.setter 1463 | def port(self, port): 1464 | """ 1465 | The port value can be 1-65535 or None, meaning no port specified. If 1466 | is None and self.scheme is a known scheme in DEFAULT_PORTS, 1467 | the default port value from DEFAULT_PORTS will be used. 1468 | 1469 | Raises: ValueError on invalid port. 1470 | """ 1471 | if port is None: 1472 | self._port = DEFAULT_PORTS.get(self.scheme) 1473 | elif is_valid_port(port): 1474 | self._port = int(str(port)) 1475 | else: 1476 | raise ValueError("Invalid port '%s'." % port) 1477 | 1478 | @property 1479 | def netloc(self): 1480 | userpass = quote(utf8(self.username) or '', safe='') 1481 | if self.password is not None: 1482 | userpass += ':' + quote(utf8(self.password), safe='') 1483 | if userpass or self.username is not None: 1484 | userpass += '@' 1485 | 1486 | netloc = idna_encode(self.host) 1487 | if self.port and self.port != DEFAULT_PORTS.get(self.scheme): 1488 | netloc = (netloc or '') + (':' + str(self.port)) 1489 | 1490 | if userpass or netloc: 1491 | netloc = (userpass or '') + (netloc or '') 1492 | 1493 | return netloc 1494 | 1495 | @netloc.setter 1496 | def netloc(self, netloc): 1497 | """ 1498 | Params: 1499 | netloc: Network location string, like 'google.com' or 1500 | 'user:pass@google.com:99'. 1501 | Raises: ValueError on invalid port or malformed IPv6 address. 1502 | """ 1503 | # Raises ValueError on malformed IPv6 addresses. 1504 | urllib.parse.urlsplit('http://%s/' % netloc) 1505 | 1506 | username = password = host = port = None 1507 | 1508 | if netloc and '@' in netloc: 1509 | userpass, netloc = netloc.split('@', 1) 1510 | if ':' in userpass: 1511 | username, password = userpass.split(':', 1) 1512 | else: 1513 | username = userpass 1514 | 1515 | if netloc and ':' in netloc: 1516 | # IPv6 address literal. 1517 | if ']' in netloc: 1518 | colonpos, bracketpos = netloc.rfind(':'), netloc.rfind(']') 1519 | if colonpos > bracketpos and colonpos != bracketpos + 1: 1520 | raise ValueError("Invalid netloc '%s'." % netloc) 1521 | elif colonpos > bracketpos and colonpos == bracketpos + 1: 1522 | host, port = netloc.rsplit(':', 1) 1523 | else: 1524 | host = netloc 1525 | else: 1526 | host, port = netloc.rsplit(':', 1) 1527 | host = host 1528 | else: 1529 | host = netloc 1530 | 1531 | # Avoid side effects by assigning self.port before self.host so 1532 | # that if an exception is raised when assigning self.port, 1533 | # self.host isn't updated. 1534 | self.port = port # Raises ValueError on invalid port. 1535 | self.host = host 1536 | self.username = None if username is None else unquote(username) 1537 | self.password = None if password is None else unquote(password) 1538 | 1539 | @property 1540 | def origin(self): 1541 | port = '' 1542 | scheme = self.scheme or '' 1543 | host = idna_encode(self.host) or '' 1544 | if self.port and self.port != DEFAULT_PORTS.get(self.scheme): 1545 | port = ':%s' % self.port 1546 | origin = '%s://%s%s' % (scheme, host, port) 1547 | 1548 | return origin 1549 | 1550 | @origin.setter 1551 | def origin(self, origin): 1552 | if origin is None: 1553 | self.scheme = self.netloc = None 1554 | else: 1555 | toks = origin.split('://', 1) 1556 | if len(toks) == 1: 1557 | host_port = origin 1558 | else: 1559 | self.scheme, host_port = toks 1560 | 1561 | if ':' in host_port: 1562 | self.host, self.port = host_port.split(':', 1) 1563 | else: 1564 | self.host = host_port 1565 | 1566 | @property 1567 | def url(self): 1568 | return self.tostr() 1569 | 1570 | @url.setter 1571 | def url(self, url): 1572 | return self.load(url) 1573 | 1574 | def add(self, args=_absent, path=_absent, fragment_path=_absent, 1575 | fragment_args=_absent, query_params=_absent): 1576 | """ 1577 | Add components to a URL and return this furl instance, . 1578 | 1579 | If both and are provided, a UserWarning is 1580 | raised because is provided as a shortcut for 1581 | , not to be used simultaneously with 1582 | . Nonetheless, providing both and 1583 | behaves as expected, with query keys and values 1584 | from both and added to the query - 1585 | first, then . 1586 | 1587 | Parameters: 1588 | args: Shortcut for . 1589 | path: A list of path segments to add to the existing path 1590 | segments, or a path string to join with the existing path 1591 | string. 1592 | query_params: A dictionary of query keys and values or list of 1593 | key:value items to add to the query. 1594 | fragment_path: A list of path segments to add to the existing 1595 | fragment path segments, or a path string to join with the 1596 | existing fragment path string. 1597 | fragment_args: A dictionary of query keys and values or list 1598 | of key:value items to add to the fragment's query. 1599 | 1600 | Returns: . 1601 | 1602 | Raises: UserWarning if redundant and possibly conflicting and 1603 | were provided. 1604 | """ 1605 | if args is not _absent and query_params is not _absent: 1606 | s = ('Both and provided to furl.add(). ' 1607 | ' is a shortcut for , not to be used ' 1608 | 'with . See furl.add() documentation for more ' 1609 | 'details.') 1610 | warnings.warn(s, UserWarning) 1611 | 1612 | if path is not _absent: 1613 | self.path.add(path) 1614 | if args is not _absent: 1615 | self.query.add(args) 1616 | if query_params is not _absent: 1617 | self.query.add(query_params) 1618 | if fragment_path is not _absent or fragment_args is not _absent: 1619 | self.fragment.add(path=fragment_path, args=fragment_args) 1620 | 1621 | return self 1622 | 1623 | def set(self, args=_absent, path=_absent, fragment=_absent, query=_absent, 1624 | scheme=_absent, username=_absent, password=_absent, host=_absent, 1625 | port=_absent, netloc=_absent, origin=_absent, query_params=_absent, 1626 | fragment_path=_absent, fragment_args=_absent, 1627 | fragment_separator=_absent): 1628 | """ 1629 | Set components of a url and return this furl instance, . 1630 | 1631 | If any overlapping, and hence possibly conflicting, parameters 1632 | are provided, appropriate UserWarning's will be raised. The 1633 | groups of parameters that could potentially overlap are 1634 | 1635 | and 1636 | , , and/or ( or ) 1637 | and ( and/or ) 1638 | any two or all of , , and/or 1639 | 1640 | In all of the above groups, the latter parameter(s) take 1641 | precedence over the earlier parameter(s). So, for example 1642 | 1643 | furl('http://google.com/').set( 1644 | netloc='yahoo.com:99', host='bing.com', port=40) 1645 | 1646 | will result in a UserWarning being raised and the url becoming 1647 | 1648 | 'http://bing.com:40/' 1649 | 1650 | not 1651 | 1652 | 'http://yahoo.com:99/ 1653 | 1654 | Parameters: 1655 | args: Shortcut for . 1656 | path: A list of path segments or a path string to adopt. 1657 | fragment: Fragment string to adopt. 1658 | scheme: Scheme string to adopt. 1659 | netloc: Network location string to adopt. 1660 | origin: Scheme and netloc. 1661 | query: Query string to adopt. 1662 | query_params: A dictionary of query keys and values or list of 1663 | key:value items to adopt. 1664 | fragment_path: A list of path segments to adopt for the 1665 | fragment's path or a path string to adopt as the fragment's 1666 | path. 1667 | fragment_args: A dictionary of query keys and values or list 1668 | of key:value items for the fragment's query to adopt. 1669 | fragment_separator: Boolean whether or not there should be a 1670 | '?' separator between the fragment path and fragment query. 1671 | host: Host string to adopt. 1672 | port: Port number to adopt. 1673 | username: Username string to adopt. 1674 | password: Password string to adopt. 1675 | Raises: 1676 | ValueError on invalid port. 1677 | UserWarning if and are provided. 1678 | UserWarning if , and/or ( and/or ) are 1679 | provided. 1680 | UserWarning if , , and/or are provided. 1681 | UserWarning if and (, 1682 | , and/or ) are provided. 1683 | Returns: . 1684 | """ 1685 | def present(v): 1686 | return v is not _absent 1687 | 1688 | if present(scheme) and present(origin): 1689 | s = ('Possible parameter overlap: and . See ' 1690 | 'furl.set() documentation for more details.') 1691 | warnings.warn(s, UserWarning) 1692 | provided = [ 1693 | present(netloc), present(origin), present(host) or present(port)] 1694 | if sum(provided) >= 2: 1695 | s = ('Possible parameter overlap: , and/or ' 1696 | '( and/or ) provided. See furl.set() ' 1697 | 'documentation for more details.') 1698 | warnings.warn(s, UserWarning) 1699 | if sum(present(p) for p in [args, query, query_params]) >= 2: 1700 | s = ('Possible parameter overlap: , , and/or ' 1701 | ' provided. See furl.set() documentation for ' 1702 | 'more details.') 1703 | warnings.warn(s, UserWarning) 1704 | provided = [fragment_path, fragment_args, fragment_separator] 1705 | if present(fragment) and any(present(p) for p in provided): 1706 | s = ('Possible parameter overlap: and ' 1707 | '(and/or ) or ' 1708 | 'and provided. See furl.set() ' 1709 | 'documentation for more details.') 1710 | warnings.warn(s, UserWarning) 1711 | 1712 | # Guard against side effects on exception. 1713 | original_url = self.url 1714 | try: 1715 | if username is not _absent: 1716 | self.username = username 1717 | if password is not _absent: 1718 | self.password = password 1719 | if netloc is not _absent: 1720 | # Raises ValueError on invalid port or malformed IP. 1721 | self.netloc = netloc 1722 | if origin is not _absent: 1723 | # Raises ValueError on invalid port or malformed IP. 1724 | self.origin = origin 1725 | if scheme is not _absent: 1726 | self.scheme = scheme 1727 | if host is not _absent: 1728 | # Raises ValueError on invalid host or malformed IP. 1729 | self.host = host 1730 | if port is not _absent: 1731 | self.port = port # Raises ValueError on invalid port. 1732 | 1733 | if path is not _absent: 1734 | self.path.load(path) 1735 | if query is not _absent: 1736 | self.query.load(query) 1737 | if args is not _absent: 1738 | self.query.load(args) 1739 | if query_params is not _absent: 1740 | self.query.load(query_params) 1741 | if fragment is not _absent: 1742 | self.fragment.load(fragment) 1743 | if fragment_path is not _absent: 1744 | self.fragment.path.load(fragment_path) 1745 | if fragment_args is not _absent: 1746 | self.fragment.query.load(fragment_args) 1747 | if fragment_separator is not _absent: 1748 | self.fragment.separator = fragment_separator 1749 | except Exception: 1750 | self.load(original_url) 1751 | raise 1752 | 1753 | return self 1754 | 1755 | def remove(self, args=_absent, path=_absent, fragment=_absent, 1756 | query=_absent, scheme=False, username=False, password=False, 1757 | host=False, port=False, netloc=False, origin=False, 1758 | query_params=_absent, fragment_path=_absent, 1759 | fragment_args=_absent): 1760 | """ 1761 | Remove components of this furl's URL and return this furl 1762 | instance, . 1763 | 1764 | Parameters: 1765 | args: Shortcut for query_params. 1766 | path: A list of path segments to remove from the end of the 1767 | existing path segments list, or a path string to remove from 1768 | the end of the existing path string, or True to remove the 1769 | path portion of the URL entirely. 1770 | query: A list of query keys to remove from the query, if they 1771 | exist, or True to remove the query portion of the URL 1772 | entirely. 1773 | query_params: A list of query keys to remove from the query, 1774 | if they exist. 1775 | port: If True, remove the port from the network location 1776 | string, if it exists. 1777 | fragment: If True, remove the fragment portion of the URL 1778 | entirely. 1779 | fragment_path: A list of path segments to remove from the end 1780 | of the fragment's path segments or a path string to remove 1781 | from the end of the fragment's path string. 1782 | fragment_args: A list of query keys to remove from the 1783 | fragment's query, if they exist. 1784 | username: If True, remove the username, if it exists. 1785 | password: If True, remove the password, if it exists. 1786 | Returns: . 1787 | """ 1788 | if scheme is True: 1789 | self.scheme = None 1790 | if username is True: 1791 | self.username = None 1792 | if password is True: 1793 | self.password = None 1794 | if host is True: 1795 | self.host = None 1796 | if port is True: 1797 | self.port = None 1798 | if netloc is True: 1799 | self.netloc = None 1800 | if origin is True: 1801 | self.origin = None 1802 | 1803 | if path is not _absent: 1804 | self.path.remove(path) 1805 | 1806 | if args is not _absent: 1807 | self.query.remove(args) 1808 | if query is not _absent: 1809 | self.query.remove(query) 1810 | if query_params is not _absent: 1811 | self.query.remove(query_params) 1812 | 1813 | if fragment is not _absent: 1814 | self.fragment.remove(fragment) 1815 | if fragment_path is not _absent: 1816 | self.fragment.path.remove(fragment_path) 1817 | if fragment_args is not _absent: 1818 | self.fragment.query.remove(fragment_args) 1819 | 1820 | return self 1821 | 1822 | def tostr(self, query_delimiter='&', query_quote_plus=True, 1823 | query_dont_quote=''): 1824 | encoded_query = self.query.encode( 1825 | query_delimiter, query_quote_plus, query_dont_quote) 1826 | url = urllib.parse.urlunsplit(( 1827 | self.scheme or '', # Must be text type in Python 3. 1828 | self.netloc, 1829 | str(self.path), 1830 | encoded_query, 1831 | str(self.fragment), 1832 | )) 1833 | 1834 | # Differentiate between '' and None values for scheme and netloc. 1835 | if self.scheme == '': 1836 | url = ':' + url 1837 | 1838 | if self.netloc == '': 1839 | if self.scheme is None: 1840 | url = '//' + url 1841 | elif strip_scheme(url) == '': 1842 | url = url + '//' 1843 | 1844 | return str(url) 1845 | 1846 | def join(self, *urls): 1847 | for url in urls: 1848 | if not isinstance(url, six.string_types): 1849 | url = str(url) 1850 | newurl = urljoin(self.url, url) 1851 | self.load(newurl) 1852 | return self 1853 | 1854 | def copy(self): 1855 | return self.__class__(self) 1856 | 1857 | def asdict(self): 1858 | return { 1859 | 'url': self.url, 1860 | 'scheme': self.scheme, 1861 | 'username': self.username, 1862 | 'password': self.password, 1863 | 'host': self.host, 1864 | 'host_encoded': idna_encode(self.host), 1865 | 'port': self.port, 1866 | 'netloc': self.netloc, 1867 | 'origin': self.origin, 1868 | 'path': self.path.asdict(), 1869 | 'query': self.query.asdict(), 1870 | 'fragment': self.fragment.asdict(), 1871 | } 1872 | 1873 | def __truediv__(self, path): 1874 | return self.copy().add(path=path) 1875 | 1876 | def __eq__(self, other): 1877 | try: 1878 | return self.url == other.url 1879 | except AttributeError: 1880 | return None 1881 | 1882 | def __ne__(self, other): 1883 | return not self == other 1884 | 1885 | def __setattr__(self, attr, value): 1886 | if (not PathCompositionInterface.__setattr__(self, attr, value) and 1887 | not QueryCompositionInterface.__setattr__(self, attr, value) and 1888 | not FragmentCompositionInterface.__setattr__(self, attr, value)): 1889 | object.__setattr__(self, attr, value) 1890 | 1891 | def __unicode__(self): 1892 | return self.tostr() 1893 | 1894 | def __repr__(self): 1895 | return "%s('%s')" % (self.__class__.__name__, str(self)) 1896 | -------------------------------------------------------------------------------- /furl/omdict1D.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # 4 | # furl - URL manipulation made simple. 5 | # 6 | # Ansgar Grunseid 7 | # grunseid.com 8 | # grunseid@gmail.com 9 | # 10 | # License: Build Amazing Things (Unlicense) 11 | # 12 | 13 | from orderedmultidict import omdict 14 | 15 | from .common import is_iterable_but_not_string, absent as _absent 16 | 17 | 18 | class omdict1D(omdict): 19 | 20 | """ 21 | One dimensional ordered multivalue dictionary. Whenever a list of 22 | values is passed to set(), __setitem__(), add(), update(), or 23 | updateall(), it's treated as multiple values and the appropriate 24 | 'list' method is called on that list, like setlist() or 25 | addlist(). For example: 26 | 27 | omd = omdict1D() 28 | 29 | omd[1] = [1,2,3] 30 | omd[1] != [1,2,3] # True. 31 | omd[1] == 1 # True. 32 | omd.getlist(1) == [1,2,3] # True. 33 | 34 | omd.add(2, [2,3,4]) 35 | omd[2] != [2,3,4] # True. 36 | omd[2] == 2 # True. 37 | omd.getlist(2) == [2,3,4] # True. 38 | 39 | omd.update([(3, [3,4,5])]) 40 | omd[3] != [3,4,5] # True. 41 | omd[3] == 3 # True. 42 | omd.getlist(3) == [3,4,5] # True. 43 | 44 | omd = omdict([(1,None),(2,None)]) 45 | omd.updateall([(1,[1,11]), (2,[2,22])]) 46 | omd.allitems == [(1,1), (1,11), (2,2), (2,22)] 47 | """ 48 | 49 | def add(self, key, value): 50 | if not is_iterable_but_not_string(value): 51 | value = [value] 52 | 53 | if value: 54 | self._map.setdefault(key, list()) 55 | 56 | for val in value: 57 | node = self._items.append(key, val) 58 | self._map[key].append(node) 59 | 60 | return self 61 | 62 | def set(self, key, value): 63 | return self._set(key, value) 64 | 65 | def __setitem__(self, key, value): 66 | return self._set(key, value) 67 | 68 | def _bin_update_items(self, items, replace_at_most_one, 69 | replacements, leftovers): 70 | """ 71 | Subclassed from omdict._bin_update_items() to make update() and 72 | updateall() process lists of values as multiple values. 73 | 74 | and are modified directly, ala pass by 75 | reference. 76 | """ 77 | for key, values in items: 78 | # is not a list or an empty list. 79 | like_list_not_str = is_iterable_but_not_string(values) 80 | if not like_list_not_str or (like_list_not_str and not values): 81 | values = [values] 82 | 83 | for value in values: 84 | # If the value is [], remove any existing leftovers with 85 | # key and set the list of values itself to [], 86 | # which in turn will later delete when [] is 87 | # passed to omdict.setlist() in 88 | # omdict._update_updateall(). 89 | if value == []: 90 | replacements[key] = [] 91 | leftovers[:] = [lst for lst in leftovers if key != lst[0]] 92 | # If there are existing items with key that have 93 | # yet to be marked for replacement, mark that item's 94 | # value to be replaced by by appending it to 95 | # . 96 | elif (key in self and 97 | replacements.get(key, _absent) in [[], _absent]): 98 | replacements[key] = [value] 99 | elif (key in self and not replace_at_most_one and 100 | len(replacements[key]) < len(self.values(key))): 101 | replacements[key].append(value) 102 | elif replace_at_most_one: 103 | replacements[key] = [value] 104 | else: 105 | leftovers.append((key, value)) 106 | 107 | def _set(self, key, value): 108 | if not is_iterable_but_not_string(value): 109 | value = [value] 110 | self.setlist(key, value) 111 | 112 | return self 113 | -------------------------------------------------------------------------------- /logo-as-text.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | image/svg+xml 8 | 9 | 10 | 11 | 12 | 13 | 14 | f://url 15 | 16 | 17 | -------------------------------------------------------------------------------- /logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | image/svg+xml 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [metadata] 5 | license_file = LICENSE.md 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # furl - URL manipulation made simple. 6 | # 7 | # Ansgar Grunseid 8 | # grunseid.com 9 | # grunseid@gmail.com 10 | # 11 | # License: Build Amazing Things (Unlicense) 12 | # 13 | 14 | import os 15 | import sys 16 | from os.path import dirname, join as pjoin 17 | from setuptools import setup, find_packages, Command 18 | from setuptools.command.test import test as TestCommand 19 | 20 | 21 | meta = {} 22 | with open(pjoin('furl', '__version__.py')) as f: 23 | exec(f.read(), meta) 24 | 25 | 26 | readmePath = pjoin(dirname(__file__), 'README.md') 27 | with open(readmePath) as f: 28 | readmeContent = f.read() 29 | 30 | 31 | class Publish(Command): 32 | """Publish to PyPI with twine.""" 33 | user_options = [] 34 | 35 | def initialize_options(self): 36 | pass 37 | 38 | def finalize_options(self): 39 | pass 40 | 41 | def run(self): 42 | os.system('python setup.py sdist bdist_wheel') 43 | 44 | sdist = 'dist/furl-%s.tar.gz' % meta['__version__'] 45 | wheel = 'dist/furl-%s-py2.py3-none-any.whl' % meta['__version__'] 46 | rc = os.system('twine upload "%s" "%s"' % (sdist, wheel)) 47 | 48 | sys.exit(rc) 49 | 50 | 51 | class RunTests(TestCommand): 52 | """ 53 | Run the unit tests. 54 | 55 | To test all supported Python versions (as specified in tox.ini) in 56 | parallel, run 57 | 58 | $ tox -p 59 | 60 | By default, `python setup.py test` fails if tests/ isn't a Python 61 | module; i.e. if the tests/ directory doesn't contain an __init__.py 62 | file). But the tests/ directory shouldn't contain an __init__.py 63 | file and tests/ shouldn't be a Python module. See 64 | 65 | http://doc.pytest.org/en/latest/goodpractices.html 66 | 67 | Running the unit tests manually here enables `python setup.py test` 68 | without tests/ being a Python module. 69 | """ 70 | def run_tests(self): 71 | from unittest import TestLoader, TextTestRunner 72 | tests_dir = pjoin(dirname(__file__), 'tests/') 73 | suite = TestLoader().discover(tests_dir) 74 | result = TextTestRunner().run(suite) 75 | sys.exit(0 if result.wasSuccessful() else -1) 76 | 77 | 78 | setup( 79 | name=meta['__title__'], 80 | license=meta['__license__'], 81 | version=meta['__version__'], 82 | author=meta['__author__'], 83 | author_email=meta['__contact__'], 84 | url=meta['__url__'], 85 | description=meta['__description__'], 86 | long_description=readmeContent, 87 | long_description_content_type='text/markdown', 88 | packages=find_packages(), 89 | include_package_data=True, 90 | platforms=['any'], 91 | classifiers=[ 92 | 'License :: Public Domain', 93 | 'Natural Language :: English', 94 | 'Intended Audience :: Developers', 95 | 'Topic :: Software Development :: Libraries', 96 | 'Development Status :: 5 - Production/Stable', 97 | 'Programming Language :: Python', 98 | 'Programming Language :: Python :: 3', 99 | 'Programming Language :: Python :: 3.8', 100 | 'Programming Language :: Python :: 3.9', 101 | 'Programming Language :: Python :: 3.10', 102 | 'Programming Language :: Python :: 3.11', 103 | 'Programming Language :: Python :: 3.12', 104 | 'Programming Language :: Python :: 3.13', 105 | 'Programming Language :: Python :: Implementation :: PyPy', 106 | 'Programming Language :: Python :: Implementation :: CPython', 107 | ], 108 | tests_require=[ 109 | 'flake8', 110 | 'six>=1.8.0', 111 | ], 112 | install_requires=[ 113 | 'six>=1.8.0', 114 | 'orderedmultidict>=1.0.1', 115 | ], 116 | cmdclass={ 117 | 'test': RunTests, 118 | 'publish': Publish, 119 | }, 120 | ) 121 | -------------------------------------------------------------------------------- /tests/test_omdict1D.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # 4 | # furl - URL manipulation made simple. 5 | # 6 | # Ansgar Grunseid 7 | # grunseid.com 8 | # grunseid@gmail.com 9 | # 10 | # License: Build Amazing Things (Unlicense) 11 | # 12 | 13 | import unittest 14 | from itertools import chain, product, permutations 15 | 16 | import six 17 | from furl.omdict1D import omdict1D 18 | from orderedmultidict import omdict 19 | 20 | _unique = object() 21 | 22 | 23 | class TestOmdict1D(unittest.TestCase): 24 | 25 | def setUp(self): 26 | self.key = 'sup' 27 | self.keys = [1, 2, -1, 'a', None, 0.9] 28 | self.values = [1, 2, None] 29 | self.valuelists = [[], [1], [1, 2, 3], [None, None, 1]] 30 | 31 | def test_update_updateall(self): 32 | data, omd1, omd2 = omdict(), omdict1D(), omdict1D() 33 | 34 | # All permutations of (self.keys, self.values) and (self.keys, 35 | # self.valuelists). 36 | allitems = chain(product(self.keys, self.values), 37 | product(self.keys, self.valuelists)) 38 | 39 | # All updates of length one item, two items, and three items. 40 | iterators = [permutations(allitems, 1), 41 | permutations(allitems, 2), 42 | permutations(allitems, 3), 43 | permutations(allitems, 4), 44 | ] 45 | 46 | for iterator in iterators: 47 | for update in iterator: 48 | data.update(update) 49 | omd1.update(update) 50 | omd2.updateall(update) 51 | for key in six.iterkeys(omd1): 52 | if isinstance(data[key], list): 53 | assert omd1[key] == data[key][-1] 54 | else: 55 | assert omd1[key] == data[key] 56 | for key in six.iterkeys(omd2): 57 | data_values_unpacked = [] 58 | for value in data.getlist(key): 59 | if isinstance(value, list): 60 | data_values_unpacked.extend(value) 61 | else: 62 | data_values_unpacked.append(value) 63 | 64 | assert omd2.getlist(key) == data_values_unpacked 65 | 66 | # Test different empty list value locations. 67 | update_tests = [([(1, None), (2, None)], 68 | [(1, [1, 11]), (2, [2, 22])], 69 | [(1, 11), (2, 22)]), 70 | ([(1, None), (2, None)], 71 | [(1, []), (1, 1), (1, 11)], 72 | [(1, 11), (2, None)]), 73 | ([(1, None), (2, None)], 74 | [(1, 1), (1, []), (1, 11)], 75 | [(1, 11), (2, None)]), 76 | ([(1, None), (2, None)], 77 | [(1, 1), (1, 11), (1, [])], 78 | [(2, None)]), 79 | ] 80 | for init, update, result in update_tests: 81 | omd = omdict1D(init) 82 | omd.update(update) 83 | assert omd.allitems() == result 84 | 85 | updateall_tests = [([(1, None), (2, None)], 86 | [(1, [1, 11]), (2, [2, 22])], 87 | [(1, 1), (2, 2), (1, 11), (2, 22)]), 88 | ([(1, None), (2, None)], 89 | [(1, []), (1, 1), (1, 11)], 90 | [(1, 1), (2, None), (1, 11)]), 91 | ([(1, None), (2, None)], 92 | [(1, 1), (1, []), (1, 11)], 93 | [(1, 11), (2, None)]), 94 | ([(1, None), (2, None)], 95 | [(1, 1), (1, 11), (1, [])], 96 | [(2, None)]), 97 | ] 98 | for init, update, result in updateall_tests: 99 | omd = omdict1D(init) 100 | omd.updateall(update) 101 | assert omd.allitems() == result 102 | 103 | def test_add(self): 104 | runningsum = [] 105 | omd = omdict1D() 106 | for valuelist in self.valuelists: 107 | runningsum += valuelist 108 | if valuelist: 109 | assert omd.add(self.key, valuelist) == omd 110 | assert omd[self.key] == omd.get(self.key) == runningsum[0] 111 | assert omd.getlist(self.key) == runningsum 112 | else: 113 | assert self.key not in omd 114 | 115 | runningsum = [] 116 | omd = omdict1D() 117 | for value in self.values: 118 | runningsum += [value] 119 | assert omd.add(self.key, value) == omd 120 | assert omd[self.key] == omd.get(self.key) == runningsum[0] 121 | assert omd.getlist(self.key) == runningsum 122 | 123 | # Empty list of values adds nothing. 124 | assert _unique not in omd 125 | assert omd.add(_unique, []) == omd 126 | assert _unique not in omd 127 | 128 | def test_set(self): 129 | omd1, omd2, omd3 = omdict1D(), omdict1D(), omdict1D() 130 | 131 | for valuelist in self.valuelists: 132 | omd1[self.key] = valuelist 133 | assert omd2.set(self.key, valuelist) == omd2 134 | assert omd3.setlist(self.key, valuelist) == omd3 135 | assert omd1 == omd2 == omd3 and omd1.getlist(self.key) == valuelist 136 | 137 | # Empty list of values deletes that key and all its values, 138 | # equivalent to del omd[somekey]. 139 | omd = omdict1D() 140 | assert _unique not in omd 141 | omd.set(_unique, []) 142 | assert omd == omd 143 | assert _unique not in omd 144 | 145 | omd.set(_unique, [1, 2, 3]) 146 | assert omd.getlist(_unique) == [1, 2, 3] 147 | omd.set(_unique, []) 148 | assert _unique not in omd 149 | 150 | def test_setitem(self): 151 | omd = omdict1D() 152 | for value, valuelist in six.moves.zip(self.values, self.valuelists): 153 | if valuelist: 154 | omd[self.key] = valuelist 155 | assert omd[self.key] == omd.get(self.key) == valuelist[0] 156 | assert omd.getlist(self.key) == valuelist 157 | else: 158 | assert self.key not in omd 159 | 160 | omd[self.key] = value 161 | assert omd[self.key] == omd.get(self.key) == value 162 | assert omd.getlist(self.key) == [value] 163 | 164 | # Empty list of values deletes that key and all its values, 165 | # equivalent to del omd[somekey]. 166 | omd = omdict1D() 167 | assert _unique not in omd 168 | omd[_unique] = [] 169 | assert omd == omd 170 | assert _unique not in omd 171 | 172 | omd[_unique] = [1, 2, 3] 173 | assert omd.getlist(_unique) == [1, 2, 3] 174 | omd[_unique] = [] 175 | assert _unique not in omd 176 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = codestyle, py38, py39, py310, py311, py312, py313, pypy3 3 | 4 | [testenv] 5 | deps = 6 | nose2 7 | commands = 8 | nose2 9 | 10 | [testenv:codestyle] 11 | deps = flake8 12 | commands = flake8 13 | --------------------------------------------------------------------------------