├── tests ├── stubs │ └── vcr.cassette.pyi ├── conftest.py ├── test_parser.py ├── test_cli.py ├── test_matcher.py ├── test_flickr_url_parser.py └── fixtures │ └── cassettes │ └── test_it_doesnt_parse_a_broken_guest_pass_url.yml ├── .gitignore ├── .gitattributes ├── src └── flickr_url_parser │ ├── py.typed │ ├── exceptions.py │ ├── matcher.py │ ├── __init__.py │ ├── base58.py │ ├── cli.py │ ├── types.py │ └── parser.py ├── dev_requirements.in ├── .github ├── dependabot.yml └── workflows │ └── test.yml ├── LICENSE-MIT ├── CONTRIBUTING.md ├── pyproject.toml ├── README.md ├── dev_requirements.txt ├── CHANGELOG.md └── LICENSE-APACHE /tests/stubs/vcr.cassette.pyi: -------------------------------------------------------------------------------- 1 | class Cassette: ... 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | *.egg-info 3 | *.pyc 4 | .coverage 5 | dist 6 | build 7 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | dev_requirements.txt linguist-generated=true 2 | 3 | tests/fixtures/cassettes/*.yml linguist-generated=true 4 | -------------------------------------------------------------------------------- /src/flickr_url_parser/py.typed: -------------------------------------------------------------------------------- 1 | # Instruct type checkers to look for inline type annotations in this package. 2 | # See PEP 561. 3 | -------------------------------------------------------------------------------- /dev_requirements.in: -------------------------------------------------------------------------------- 1 | file:. 2 | 3 | build 4 | interrogate 5 | mypy 6 | ruff 7 | pytest-cov 8 | twine 9 | 10 | silver-nitrate[cassettes]>=1.5.0 11 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Re-export fixtures from silver-nitrate. 3 | """ 4 | 5 | from nitrate.cassettes import cassette_name, vcr_cassette 6 | 7 | __all__ = ["cassette_name", "vcr_cassette"] 8 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | time: "09:00" 8 | - package-ecosystem: "pip" 9 | directory: "/" 10 | schedule: 11 | interval: "daily" 12 | time: "09:00" 13 | -------------------------------------------------------------------------------- /src/flickr_url_parser/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Exceptions that are thrown when we're unable to parse a URL as 3 | a Flickr URL. 4 | """ 5 | 6 | 7 | class NotAFlickrUrl(Exception): 8 | """ 9 | Raised when somebody tries to parse a URL which isn't from Flickr. 10 | """ 11 | 12 | pass 13 | 14 | 15 | class UnrecognisedUrl(Exception): 16 | """ 17 | Raised when somebody tries to parse a URL on Flickr, but we 18 | can't work out what photos are there. 19 | """ 20 | 21 | pass 22 | -------------------------------------------------------------------------------- /src/flickr_url_parser/matcher.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for finding Flickr URLs in a block of text. 3 | 4 | TODO: ``matcher`` isn't a good name for this file/function. Find a better 5 | name for it. 6 | """ 7 | 8 | import re 9 | 10 | 11 | FLICKR_URL_RE_MATCH = re.compile( 12 | r"(?:https?://)" 13 | r"?(?:www\.)?" 14 | r"(?:live\.static\.?)?" 15 | r"(?:farm[0-9]+\.static\.?)?" 16 | r"(?:c[0-9]+\.static\.?)?" 17 | r"(?:static\.)?" 18 | r"(?:photos[0-9]+\.)?" 19 | r"flickr\.com[0-9A-Za-z@_\-/\.\?\&=]*" 20 | ) 21 | 22 | 23 | def find_flickr_urls_in_text(text: str) -> list[str]: 24 | """ 25 | Returns a list of Flickr URLs in a block of text (if any). 26 | """ 27 | return [url.rstrip(".") for url in FLICKR_URL_RE_MATCH.findall(text)] 28 | -------------------------------------------------------------------------------- /src/flickr_url_parser/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | ``flickr_url_parser`` is a library for parsing Flickr URLs. 3 | 4 | You enter a Flickr URL, and it tells you what it points to -- 5 | a single photo, an album, a gallery, and so on. 6 | """ 7 | 8 | from .exceptions import NotAFlickrUrl, UnrecognisedUrl 9 | from .matcher import find_flickr_urls_in_text 10 | from .parser import ( 11 | looks_like_flickr_photo_id, 12 | looks_like_flickr_user_id, 13 | parse_flickr_url, 14 | ) 15 | from .types import ParseResult 16 | 17 | __version__ = "1.12.0" 18 | 19 | 20 | __all__ = [ 21 | "looks_like_flickr_photo_id", 22 | "looks_like_flickr_user_id", 23 | "find_flickr_urls_in_text", 24 | "parse_flickr_url", 25 | "UnrecognisedUrl", 26 | "NotAFlickrUrl", 27 | "ParseResult", 28 | ] 29 | -------------------------------------------------------------------------------- /src/flickr_url_parser/base58.py: -------------------------------------------------------------------------------- 1 | """ 2 | A couple of functions related to base58 encoding, which is used for Flickr's short URLs. 3 | """ 4 | 5 | BASE58_ALPHABET = "123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ" 6 | 7 | 8 | def is_base58(num: str) -> bool: 9 | """ 10 | Returns True if a string looks like a base58-encoded value, false otherwise. 11 | """ 12 | return all(digit in BASE58_ALPHABET for digit in num) 13 | 14 | 15 | def base58_decode(num: str) -> str: 16 | """ 17 | Do a base58 decoding of a string, as used in flic.kr-style photo URLs. 18 | """ 19 | # This is a Python translation of some PHP code posted by Flickr user kellan 20 | # at https://www.flickr.com/groups/51035612836@N01/discuss/72157616713786392/ 21 | decoded = 0 22 | multi = 1 23 | 24 | while num: 25 | digit = num[-1] 26 | decoded += multi * BASE58_ALPHABET.index(digit) 27 | multi = multi * len(BASE58_ALPHABET) 28 | num = num[:-1] 29 | 30 | return str(decoded) 31 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | test: 14 | strategy: 15 | matrix: 16 | python-version: 17 | - "3.12" 18 | - "3.13" 19 | 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - name: Set up Python 26 | uses: actions/setup-python@v5 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | cache: pip 30 | 31 | - name: Install dependencies 32 | run: pip install -r dev_requirements.txt 33 | 34 | - name: Check formatting 35 | run: | 36 | ruff check . 37 | ruff format --check . 38 | 39 | - name: Check docstrings 40 | run: interrogate -vv 41 | 42 | - name: Check types 43 | run: mypy src tests 44 | 45 | - name: Run tests 46 | run: | 47 | coverage run -m pytest tests 48 | coverage report 49 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright 2023 The Flickr Foundation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Development 2 | 3 | You can set up a local development environment by cloning the repo and installing dependencies: 4 | 5 | ```console 6 | $ git clone https://github.com/Flickr-Foundation/flickr-url-parser.git 7 | $ cd flickr-url-parser 8 | $ python3 -m venv .venv 9 | $ source .venv/bin/activate 10 | $ pip install -e . 11 | ``` 12 | 13 | If you want to run tests, install the dev dependencies and run py.test: 14 | 15 | ```console 16 | $ source .venv/bin/activate 17 | $ pip install -r dev_requirements.txt 18 | $ coverage run -m pytest tests 19 | $ coverage report 20 | ``` 21 | 22 | To make changes to the library: 23 | 24 | 1. Create a new branch 25 | 2. Push your changes to GitHub 26 | 3. Open a pull request 27 | 4. Fix any issues flagged by GitHub Actions (including tests, code linting, and type checking) 28 | 5. Ask somebody to review your change 29 | 6. Merge it! 30 | 31 | To create a new version on PyPI: 32 | 33 | 1. Update the version in `src/flickr_url_parser/__init__.py` 34 | 2. Add release notes in `CHANGELOG.md` and push a new tag to GitHub 35 | 3. Deploy the release using twine: 36 | 37 | ```console 38 | $ python3 -m build 39 | $ python3 -m twine upload dist/* --username=__token__ 40 | ``` 41 | 42 | You will need [a PyPI API token](https://pypi.org/help/#apitoken) to publish packages. 43 | This token is stored in 1Password. 44 | 45 | -------------------------------------------------------------------------------- /tests/test_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for ``flickr_url_parser.parser``. 3 | """ 4 | 5 | import pytest 6 | 7 | from flickr_url_parser import looks_like_flickr_photo_id, looks_like_flickr_user_id 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "text", 12 | [ 13 | # These are three real photo IDs 14 | "32812033543", 15 | "4895431370", 16 | "5240741057", 17 | # These aren't real photo IDs, but they look like they might be 18 | "1", 19 | "123", 20 | "12345678901234567890", 21 | ], 22 | ) 23 | def test_looks_like_flickr_photo_id(text: str) -> None: 24 | """ 25 | Any string made of digits 0-9 looks like a Flickr photo ID. 26 | """ 27 | assert looks_like_flickr_photo_id(text) 28 | 29 | 30 | @pytest.mark.parametrize("text", ["-1", "½", "cat.jpg", ""]) 31 | def test_doesnt_look_like_a_flickr_photo_id(text: str) -> None: 32 | """ 33 | Any string not made of digits 0-9 doesn't look like a Flickr photo ID. 34 | """ 35 | assert not looks_like_flickr_photo_id(text) 36 | 37 | 38 | @pytest.mark.parametrize("text", ["47265398@N04"]) 39 | def test_looks_like_flickr_user_id(text: str) -> None: 40 | """ 41 | Real Flickr user IDs look like Flickr user IDs. 42 | """ 43 | assert looks_like_flickr_user_id(text) 44 | 45 | 46 | @pytest.mark.parametrize("text", ["123", "blueminds", ""]) 47 | def test_doesnt_look_like_flickr_user_id(text: str) -> None: 48 | """ 49 | These strings don't look like Flickr user IDs. 50 | """ 51 | assert not looks_like_flickr_user_id(text) 52 | -------------------------------------------------------------------------------- /src/flickr_url_parser/cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic CLI for ``flickr_url_parser``. 3 | """ 4 | 5 | import json 6 | import sys 7 | import textwrap 8 | 9 | from . import parse_flickr_url, __version__ 10 | 11 | 12 | def run_cli(argv: list[str]) -> int: 13 | """ 14 | Parse the command-line arguments and return an exit code. 15 | 16 | Possible uses: 17 | 18 | 1. Pass a URL as a command-line argument, e.g. 19 | 20 | flickr_url_parser https://flickr.com 21 | 22 | 2. Pass the ``-help`` flag to get help text: 23 | 24 | flickr_url_parser --help 25 | 26 | 3. Pass the ``--version`` flag to get the version number: 27 | 28 | flickr_url_parser --version 29 | 30 | """ 31 | # Because this interface is so simple, I just implemented it 32 | # manually. If we want to make it any more complicated, we should 33 | # use a proper library for parsing command-line arguments, 34 | # e.g. ``argparse`` or ``click`` 35 | try: 36 | single_arg = argv[1] 37 | except IndexError: 38 | print(f"Usage: {__file__} ", file=sys.stderr) 39 | return 1 40 | 41 | if single_arg == "--help": 42 | print(textwrap.dedent(parse_flickr_url.__doc__).strip()) # type: ignore[arg-type] 43 | return 0 44 | elif single_arg == "--version": 45 | print(f"flickr_url_parser {__version__}") 46 | return 0 47 | else: 48 | print(json.dumps(parse_flickr_url(single_arg))) 49 | return 0 50 | 51 | 52 | def main() -> None: # pragma: no cover 53 | """ 54 | Actually run the CLI and exit the program with the returned exit code. 55 | """ 56 | rc = run_cli(argv=sys.argv) 57 | sys.exit(rc) 58 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools >= 65", 4 | ] 5 | build-backend = "setuptools.build_meta" 6 | 7 | [project] 8 | name = "flickr-url-parser" 9 | description = "Enter a Flickr URL, and find out what sort of URL it is (single photo, album, gallery, etc.)" 10 | readme = "README.md" 11 | keywords = ["flickr"] 12 | authors = [ 13 | {name = "Flickr Foundation", email = "hello@flickr.org"}, 14 | ] 15 | maintainers = [ 16 | {name = "Alex Chan", email="alex@flickr.org"}, 17 | ] 18 | classifiers = [ 19 | "Development Status :: 5 - Production/Stable", 20 | "Programming Language :: Python :: 3.12", 21 | ] 22 | license = "MIT AND (Apache-2.0)" 23 | requires-python = ">=3.12" 24 | dependencies = [ 25 | "httpx", 26 | "hyperlink", 27 | ] 28 | dynamic = ["version"] 29 | 30 | [project.urls] 31 | "Homepage" = "https://github.com/Flickr-Foundation/flickr-url-parser" 32 | "Changelog" = "https://github.com/Flickr-Foundation/flickr-url-parser/blob/main/CHANGELOG.md" 33 | 34 | [project.scripts] 35 | flickr_url_parser = "flickr_url_parser.cli:main" 36 | 37 | [tool.setuptools.dynamic] 38 | version = {attr = "flickr_url_parser.__version__"} 39 | 40 | [tool.setuptools.packages.find] 41 | where = ["src"] 42 | 43 | [tool.setuptools.package-data] 44 | flickr_url_parser = ["py.typed"] 45 | 46 | [tool.coverage.run] 47 | branch = true 48 | source = [ 49 | "flickr_url_parser", 50 | "tests", 51 | ] 52 | 53 | [tool.coverage.report] 54 | show_missing = true 55 | skip_covered = true 56 | fail_under = 100 57 | 58 | [tool.pytest.ini_options] 59 | filterwarnings = ["error"] 60 | 61 | [tool.mypy] 62 | mypy_path = "src" 63 | strict = true 64 | 65 | [tool.interrogate] 66 | fail_under = 100 67 | omit-covered-files = true 68 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for the flickr_url_parser CLI. 3 | """ 4 | 5 | import json 6 | 7 | from pytest import CaptureFixture 8 | 9 | from flickr_url_parser.cli import run_cli 10 | 11 | 12 | def test_run_cli(capsys: CaptureFixture[str]) -> None: 13 | """ 14 | Running ``flickr_url_parser URL`` prints the parsed URL as JSON. 15 | """ 16 | rc = run_cli( 17 | argv=[ 18 | "flickr_url_parser", 19 | "https://www.flickr.com/photos/coast_guard/32812033543", 20 | ] 21 | ) 22 | 23 | assert rc == 0 24 | 25 | captured = capsys.readouterr() 26 | assert json.loads(captured.out) == { 27 | "type": "single_photo", 28 | "photo_id": "32812033543", 29 | "user_url": "https://www.flickr.com/photos/coast_guard/", 30 | "user_id": None, 31 | } 32 | assert captured.err == "" 33 | 34 | 35 | def test_run_cli_shows_help(capsys: CaptureFixture[str]) -> None: 36 | """ 37 | Running ``flickr_url_parser --help`` prints the help message. 38 | """ 39 | rc = run_cli(argv=["flickr_url_parser", "--help"]) 40 | 41 | assert rc == 0 42 | 43 | captured = capsys.readouterr() 44 | assert captured.out.startswith("Parse a Flickr URL and return some key information") 45 | assert captured.err == "" 46 | 47 | 48 | def test_run_cli_shows_version(capsys: CaptureFixture[str]) -> None: 49 | """ 50 | Running ``flickr_url_parser --version`` prints the version number. 51 | """ 52 | rc = run_cli(argv=["flickr_url_parser", "--version"]) 53 | 54 | assert rc == 0 55 | 56 | captured = capsys.readouterr() 57 | assert captured.out.startswith("flickr_url_parser 1.") 58 | assert captured.err == "" 59 | 60 | 61 | def test_run_cli_throws_err(capsys: CaptureFixture[str]) -> None: 62 | """ 63 | Running ``flickr_url_parser`` without a URL is an error. 64 | """ 65 | rc = run_cli(argv=["flickr_url_parser"]) 66 | 67 | assert rc == 1 68 | 69 | captured = capsys.readouterr() 70 | assert captured.out.startswith("") 71 | assert captured.err.startswith("Usage:") 72 | -------------------------------------------------------------------------------- /tests/test_matcher.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for ``flickr_url_parser.matcher``. 3 | """ 4 | 5 | import pytest 6 | 7 | from flickr_url_parser import find_flickr_urls_in_text 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "url", 12 | [ 13 | "https://www.flickr.com", 14 | "https://www.flickr.com/account/email", 15 | "https://www.flickr.com/groups/slovenia/discuss/", 16 | "https://live.staticflickr.com/7372/help.jpg", 17 | "http://flickr.com/photos/coast_guard/32812033543", 18 | "http://farm3.static.flickr.com/2060/2264610973_3989a4627f_o.jpg", 19 | "https://www.flickr.com/photo_zoom.gne?id=196155401&size=m", 20 | "http://photos4.flickr.com/4891733_cec6cd1c66_b_d.jpg", 21 | "https://farm5.static.flickr.com/4586/37767087695_bb4ecff5f4_o.jpg", 22 | # 23 | # From https://commons.wikimedia.org/wiki/File:Adriaen_Brouwer_-_The_slaughter_feast.jpg 24 | # Retrieved 12 December 2023 25 | "https://farm5.staticflickr.com/4586/37767087695_bb4ecff5f4_o.jpg", 26 | # 27 | # From https://commons.wikimedia.org/wiki/File:Maradona_Soccer_Aid.jpg 28 | # Retrieved 12 December 2023 29 | "http://static.flickr.com/63/155697786_0125559b4e.jpg", 30 | # 31 | # From https://commons.wikimedia.org/wiki/File:IgnazioDanti.jpg 32 | # Retrieved 12 December 2023 33 | "https://c8.staticflickr.com/6/5159/14288803431_7cf094b085_b.jpg", 34 | ], 35 | ) 36 | def test_find_flickr_urls_in_text(url: str) -> None: 37 | """ 38 | Find a URL in the middle of some text. 39 | """ 40 | text = f"aaa {url} bbb" 41 | assert find_flickr_urls_in_text(text) == [url] 42 | 43 | 44 | def test_it_strips_trailing_dots() -> None: 45 | """ 46 | Find a URL in a sentence with a trailing period. 47 | 48 | This is based on text taken from 49 | https://commons.wikimedia.org/wiki/File:HMAS_AE2_Sydney.jpg 50 | Retrieved 12 December 2023 51 | """ 52 | text = "File source is https://www.flickr.com/photos/41311545@N05/4302722415." 53 | 54 | assert find_flickr_urls_in_text(text) == [ 55 | "https://www.flickr.com/photos/41311545@N05/4302722415" 56 | ] 57 | -------------------------------------------------------------------------------- /src/flickr_url_parser/types.py: -------------------------------------------------------------------------------- 1 | """ 2 | Different types of parsed URL which might be returned by ``flickr_url_parser``. 3 | """ 4 | 5 | import typing 6 | 7 | 8 | class Homepage(typing.TypedDict): 9 | """ 10 | The Flickr.com homepage. 11 | """ 12 | 13 | type: typing.Literal["homepage"] 14 | 15 | 16 | class SinglePhoto(typing.TypedDict): 17 | """ 18 | A single photo on Flickr.com. 19 | """ 20 | 21 | type: typing.Literal["single_photo"] 22 | photo_id: str 23 | user_url: str | None 24 | user_id: str | None 25 | 26 | 27 | def anonymous_single_photo(photo_id: str) -> SinglePhoto: 28 | """ 29 | A single photo where only the photo ID is known, and nothing about 30 | the photo's owner. 31 | """ 32 | return { 33 | "type": "single_photo", 34 | "photo_id": photo_id, 35 | "user_url": None, 36 | "user_id": None, 37 | } 38 | 39 | 40 | class Album(typing.TypedDict): 41 | """ 42 | An album on Flickr.com. 43 | 44 | An album is a collection of your own photos. It can only contain 45 | photos that you uploaded. 46 | 47 | It's possible to paginate through large albums. 48 | """ 49 | 50 | type: typing.Literal["album"] 51 | user_url: str 52 | album_id: str 53 | page: int 54 | 55 | 56 | class User(typing.TypedDict): 57 | """ 58 | A user's profile on Flickr.com. 59 | 60 | If you're looking at a user's photostream, it can be paginated. 61 | The ``page`` parameter is only returned if you're in this view. 62 | """ 63 | 64 | type: typing.Literal["user"] 65 | page: int 66 | user_url: str 67 | user_id: str | None 68 | 69 | 70 | class Group(typing.TypedDict): 71 | """ 72 | A group on Flickr.com. 73 | 74 | A group is a collection of users who shared a common interest or focus, 75 | who put their photos in a shared "pool". The pool may be paginated. 76 | """ 77 | 78 | type: typing.Literal["group"] 79 | group_url: str 80 | page: int 81 | 82 | 83 | class Gallery(typing.TypedDict): 84 | """ 85 | A gallery on Flickr.com. 86 | 87 | A gallery is a collection of other people's photos. It can only 88 | contain photos uploaded by other people. 89 | 90 | It's possible to paginate through large galleries. 91 | """ 92 | 93 | type: typing.Literal["gallery"] 94 | gallery_id: str 95 | page: int 96 | 97 | 98 | class Tag(typing.TypedDict): 99 | """ 100 | A list of photos with a tag on Flickr.com. 101 | 102 | It's possible to paginate through popular tags. 103 | """ 104 | 105 | type: typing.Literal["tag"] 106 | tag: str 107 | page: int 108 | 109 | 110 | ParseResult = Homepage | SinglePhoto | Album | User | Group | Gallery | Tag 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # flickr-url-parser 2 | 3 | This is a library for parsing Flickr URLs. 4 | You enter a Flickr URL, and it tells you what it points to – a single photo, an album, a gallery, and so on. 5 | 6 | Examples: 7 | 8 | ```console 9 | $ flickr_url_parser "https://www.flickr.com/photos/sdasmarchives/50567413447" 10 | {"type": "single_photo", "photo_id": "50567413447"} 11 | 12 | $ flickr_url_parser "https://www.flickr.com/photos/aljazeeraenglish/albums/72157626164453131" 13 | {"type": "album", "user_url": "https://www.flickr.com/photos/aljazeeraenglish", "album_id": "72157626164453131", "page": 1} 14 | 15 | $ flickr_url_parser "https://www.flickr.com/photos/blueminds/page3" 16 | {"type": "user", "user_url": "https://www.flickr.com/photos/blueminds"} 17 | ``` 18 | 19 | ## Motivation 20 | 21 | There's a lot of variety in Flickr URLs, even among URLs that point to the same thing. 22 | For example, all three of these URLs point to the same photo page: 23 | 24 | ``` 25 | https://www.flickr.com/photos/sdasmarchives/50567413447 26 | http://flickr.com/photos/49487266@N07/50567413447 27 | https://www.flickr.com/photo.gne?id=50567413447 28 | ``` 29 | 30 | Dealing with all these variants can be tricky – this library aims to simplify that. 31 | We use it for [Flinumeratr], [Flickypedia], and other [Flickr Foundation] projects. 32 | 33 | [Flinumeratr]: https://www.flickr.org/tools/flinumeratr/ 34 | [Flickypedia]: https://www.flickr.org/tools/flickypedia/ 35 | [Flickr Foundation]: https://www.flickr.org/ 36 | 37 | ## Usage 38 | 39 | There are two ways to use flickr_url_parser: 40 | 41 | 1. **As a command-line tool.** 42 | Run `flickr_url_parser`, passing the Flickr URL as a single argument: 43 | 44 | ```console 45 | $ flickr_url_parser "https://www.flickr.com/photos/sdasmarchives/50567413447" 46 | {"type": "single_photo", "photo_id": "50567413447"} 47 | ``` 48 | 49 | The result will be printed as a JSON object. 50 | 51 | To see more information about the possible return values, run `flickr_url_parser --help`. 52 | 53 | 2. **As a Python library.** 54 | Import the function `parse_flickr_url` and pass the Flickr URL as a single argument: 55 | 56 | ```pycon 57 | >>> from flickr_url_parser import parse_flickr_url 58 | 59 | >>> parse_flickr_url("https://www.flickr.com/photos/sdasmarchives/50567413447") 60 | {"type": "single_photo", "photo_id": "50567413447"} 61 | ``` 62 | 63 | To see more information about the possible return values, use the [`help` function](https://docs.python.org/3/library/functions.html#help): 64 | 65 | ```pycon 66 | >>> help(parse_flickr_url) 67 | ``` 68 | 69 | Note that just because a URL can be parsed does not mean it can be *resolved* to a photo and/or photos. 70 | The only way to know if there are photos behind the URL is to (1) try to fetch the URL or (2) use the output from the parser to ask the Flickr API for photos. 71 | 72 | ## Development 73 | 74 | If you want to make changes to the library, there are instructions in [CONTRIBUTING.md](./CONTRIBUTING.md). 75 | 76 | ## Useful reading 77 | 78 | - Photo Image URLs in the Flickr docs: 79 | 80 | ## License 81 | 82 | This project is dual-licensed as Apache-2.0 and MIT. 83 | -------------------------------------------------------------------------------- /dev_requirements.txt: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by uv via the following command: 2 | # uv pip compile dev_requirements.in --output-file dev_requirements.txt 3 | anyio==4.4.0 4 | # via httpx 5 | attrs==24.2.0 6 | # via interrogate 7 | build==1.2.1 8 | # via -r dev_requirements.in 9 | certifi==2024.6.2 10 | # via 11 | # httpcore 12 | # httpx 13 | # requests 14 | charset-normalizer==3.3.2 15 | # via requests 16 | click==8.1.7 17 | # via interrogate 18 | colorama==0.4.6 19 | # via interrogate 20 | coverage==7.5.3 21 | # via pytest-cov 22 | docutils==0.21.2 23 | # via readme-renderer 24 | flickr-url-parser @ file:. 25 | # via -r dev_requirements.in 26 | h11==0.14.0 27 | # via httpcore 28 | httpcore==1.0.5 29 | # via httpx 30 | httpx==0.27.0 31 | # via flickr-url-parser 32 | hyperlink==21.0.0 33 | # via flickr-url-parser 34 | idna==3.7 35 | # via 36 | # anyio 37 | # httpx 38 | # hyperlink 39 | # requests 40 | # yarl 41 | importlib-metadata==7.1.0 42 | # via twine 43 | iniconfig==2.0.0 44 | # via pytest 45 | interrogate==1.7.0 46 | # via -r dev_requirements.in 47 | jaraco-classes==3.4.0 48 | # via keyring 49 | jaraco-context==5.3.0 50 | # via keyring 51 | jaraco-functools==4.0.1 52 | # via keyring 53 | keyring==25.2.1 54 | # via twine 55 | markdown-it-py==3.0.0 56 | # via rich 57 | mdurl==0.1.2 58 | # via markdown-it-py 59 | more-itertools==10.3.0 60 | # via 61 | # jaraco-classes 62 | # jaraco-functools 63 | multidict==6.1.0 64 | # via yarl 65 | mypy==1.10.0 66 | # via -r dev_requirements.in 67 | mypy-extensions==1.0.0 68 | # via mypy 69 | nh3==0.2.17 70 | # via readme-renderer 71 | packaging==24.1 72 | # via 73 | # build 74 | # pytest 75 | pkginfo==1.11.1 76 | # via twine 77 | pluggy==1.5.0 78 | # via pytest 79 | propcache==0.3.0 80 | # via yarl 81 | py==1.11.0 82 | # via interrogate 83 | pygments==2.18.0 84 | # via 85 | # readme-renderer 86 | # rich 87 | pyproject-hooks==1.1.0 88 | # via build 89 | pytest==8.2.2 90 | # via 91 | # pytest-cov 92 | # silver-nitrate 93 | pytest-cov==5.0.0 94 | # via -r dev_requirements.in 95 | pyyaml==6.0.2 96 | # via vcrpy 97 | readme-renderer==43.0 98 | # via twine 99 | requests==2.32.3 100 | # via 101 | # requests-toolbelt 102 | # twine 103 | requests-toolbelt==1.0.0 104 | # via twine 105 | rfc3986==2.0.0 106 | # via twine 107 | rich==13.7.1 108 | # via twine 109 | ruff==0.4.8 110 | # via -r dev_requirements.in 111 | silver-nitrate==1.5.0 112 | # via -r dev_requirements.in 113 | sniffio==1.3.1 114 | # via 115 | # anyio 116 | # httpx 117 | tabulate==0.9.0 118 | # via interrogate 119 | twine==5.1.0 120 | # via -r dev_requirements.in 121 | typing-extensions==4.12.2 122 | # via mypy 123 | urllib3==2.2.1 124 | # via 125 | # requests 126 | # twine 127 | # vcrpy 128 | vcrpy==7.0.0 129 | # via silver-nitrate 130 | wrapt==1.17.2 131 | # via vcrpy 132 | yarl==1.18.3 133 | # via vcrpy 134 | zipp==3.19.2 135 | # via importlib-metadata 136 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## v1.12.0 - 2025-06-30 4 | 5 | * To make the library more predictable, it doesn't make HTTP requests or follow redirects by default. 6 | If you want to parse Guest Passes or short URLs, you need to explicitly enable it with the `follow_redirects=True`. 7 | 8 | This may break existing code that was parsing these URLs, but they're so rare I don't think they're a big issue in practice. 9 | 10 | ## v1.11.0 - 2025-03-04 11 | 12 | * Add support for parsing URLs from the Flickr Commons Explorer (). 13 | 14 | ## v1.10.0 - 2024-09-04 15 | 16 | * Add a new method `looks_like_flickr_photo_id` which tells you if a particular string looks like a Flickr photo ID, or not. 17 | * Rename `is_flickr_user_id` to `looks_like_flickr_user_id` to clarify that this function is a quick heuristic, and not a guarantee that a user ID exists. 18 | 19 | ## v1.9.0 - 2024-05-22 20 | 21 | * When parsing a URL which points to a single photo, return the `user_url` and `user_id` (if they can be deduced from the URL). 22 | 23 | ## v1.8.3 - 2024-04-30 24 | 25 | * Add a trailing slash to the `user_url` returned in album URLs. This more closely matches the URL structured used on Flickr.com. 26 | 27 | ## v1.8.2 - 2024-02-02 28 | 29 | * Throw a more informative TypeError if you pass a non-string value as ``url``. 30 | 31 | ## v1.8.1 - 2024-01-02 32 | 33 | * Add support for recognising video download URLs like `/video_download.gne?id=[ID]`. 34 | * Add support for recognising static video URLs like `https://live.staticflickr.com/video/…`. 35 | * Add support for recognising Flash player video URLs like `https://www.flickr.com/apps/video/stewart.swf?photo_id=…`. 36 | 37 | ## v1.8.0 - 2023-12-27 38 | 39 | * Add an optional `id` parameter to the `User` type. 40 | 41 | If you parse the URL for a user's photostream and use the URL that contains their NSID rather than their path alias, this ID will be included in the response. 42 | This allows for slightly faster lookups later. 43 | 44 | ## v1.7.1 - 2023-12-20 45 | 46 | * Add support for recognising URLs that use `/photo_edit.gne?id=[ID]` and `/photo.gne?short=[SHORT_ID]`. 47 | 48 | ## v1.7.0 - 2023-12-17 49 | 50 | Add support for recognising URLs as the Flickr homepage. 51 | 52 | There are lots of varieties of homepage URL that appear in e.g. links from Wikimedia Commons, and now they can be recognised: 53 | 54 | ```pycon 55 | >>> parse_flickr_url("www.flickr.com") 56 | {"type": "homepage"} 57 | ``` 58 | 59 | ## v1.6.1 - 2023-12-15 60 | 61 | * Fix a bug where the URL parser could throw an IndexError on URL fragments or empty strings. 62 | 63 | ## v1.6.0 - 2023-12-12 64 | 65 | * Add a new function `find_flickr_urls_in_text` which can be used to find Flickr URLs in a block of arbitrary text. 66 | Example: 67 | 68 | ```pycon 69 | >>> text = """ 70 | ... This is the help page: https://www.flickr.com/help 71 | ... 72 | ... This is a user: https://www.flickr.com/photos/mariakallin/ 73 | ... """ 74 | >>> find_flickr_urls_in_text(text) 75 | ['https://www.flickr.com/help', 'https://www.flickr.com/photos/mariakallin/'] 76 | ``` 77 | 78 | This is useful for text analysis. 79 | 80 | ## v1.5.3 - 2023-12-12 81 | 82 | Add support for parsing more varieties of URL, based on those seen in the Wikimedia Commons snapshots, including: 83 | 84 | * Old-style photo URLs that use `/photo/` instead of `/photos/`, e.g. `http://flickr.com/photo/17277074@N00/2619974961` 85 | * Photo URLs that use the `/photo_zoom.gne` path and similar `.gne` paths, e.g. `https://www.flickr.com/photo_zoom.gne?id=196155401&size=m` 86 | * A wide variety of `static.flickr.com`-like URLs; URLs that point to raw JPEGs rather than the photo description page 87 | 88 | ## v1.5.2 - 2023-12-12 89 | 90 | * Expand the support for parsing static URLs, e.g. `http://farm1.static.flickr.com/82/241abc183_dd0847d5c7_o.jpg` 91 | 92 | ## v1.5.1 - 2023-12-08 93 | 94 | * Expand the support for parsing static URLs, e.g. `https://photos12.flickr.com/16159487_3a6615a565_b.jpg` 95 | 96 | ## v1.5.0 - 2023-12-07 97 | 98 | * Add support for parsing static URLs, e.g. `https://live.staticflickr.com/65535/53381630964_63d765ee92_s.jpg` 99 | 100 | ## v1.4.0 - 2023-12-02 101 | 102 | * Drop support for Python 3.7 to Python 3.11; this library now requires Python 3.12. 103 | 104 | ## v1.3.0 - 2023-11-09 105 | 106 | * Add support for pagination. All collection URLs (albums, users, groups, galleries and tags) now include a `page` parameter that tells you what page you've navigated to in the Flickr UI. 107 | 108 | ## v1.2.4 - 2023-11-05 109 | 110 | * Explicitly export the `ParseResult` type. 111 | 112 | ## v1.2.3 - 2023-11-05 113 | 114 | * Actually make the type hints available by adding the `py.typed` file. 115 | 116 | ## v1.2.2 - 2023-10-31 117 | 118 | * Add type hints that can be used for type checking with mypy and similar tools. 119 | 120 | ## v1.2.1 - 2023-10-23 121 | 122 | * Add support for Flickr Guest Pass URLs, e.g. `https://www.flickr.com/gp/realphotomatt/M195SLkj98` 123 | 124 | ## v1.1.1 - 2023-10-20 125 | 126 | * Tighten up the definition of "numeric ID" in Flickr URLs, so only something that could plausibly be a numeric ID is allowed. 127 | 128 | e.g. `https://www.flickr.com/photos/circled/\xE2\x91\xA0\xE2\x91\xA1\xE2\x91\xA2\x60\x20\x77\x69\x6C\x6C\x20\x6E\x6F\x20\x6C\x6F\x6E\x67\x65\x72\x20\x62\x65\x20\x72\x65\x74\x75\x72\x6E\x65\x64\x20\x61\x73\x20\x61\x20\x70\x6C\x61\x75\x73\x69\x62\x6C\x65\x20\x70\x68\x6F\x74\x6F\x20\x55\x52\x4C\x2E\x0A\x0A\x23\x23\x20\x76\x31\x2E\x31\x2E\x30\x20\x2D\x20\x32\x30\x32\x33\x2D\x31\x30\x2D\x31\x38\x0A\x0A\x2A\x20\x20\x20\x41\x64\x64\x20\x74\x68\x65\x20\x61\x62\x69\x6C\x69\x74\x79\x20\x74\x6F\x20\x72\x75\x6E\x20\x66\x6C\x69\x63\x6B\x72\x5F\x75\x72\x6C\x5F\x70\x61\x72\x73\x65\x72\x20\x66\x72\x6F\x6D\x20\x74\x68\x65\x20\x63\x6F\x6D\x6D\x61\x6E\x64\x20\x6C\x69\x6E\x65\x2E\x0A\x2A\x20\x20\x20\x52\x65\x6D\x6F\x76\x65\x20\x73\x6F\x6D\x65\x20\x75\x6E\x6E\x65\x63\x65\x73\x73\x61\x72\x79\x20\x64\x65\x70\x65\x6E\x64\x65\x6E\x63\x69\x65\x73\x20\x66\x72\x6F\x6D\x20\x60\x73\x65\x74\x75\x70\x2E\x70\x79\x60\x2E\x0A\x0A\x23\x23\x20\x76\x31\x2E\x30\x2E\x30\x20\x2D\x20\x32\x30\x32\x33\x2D\x31\x30\x2D\x31\x37\x0A\x0A\x49\x6E\x69\x74\x69\x61\x6C\x20\x72\x65\x6C\x65\x61\x73\x65\x20\x6F\x66\x20\x74\x68\x69\x73\x20\x63\x6F\x64\x65\x20\x61\x73\x20\x61\x20\x73\x74\x61\x6E\x64\x61\x6C\x6F\x6E\x65\x20\x6C\x69\x62\x72\x61\x72\x79\x2E\x0A -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /src/flickr_url_parser/parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | The bulk of the parsing code. 3 | """ 4 | 5 | import re 6 | 7 | import httpx 8 | import hyperlink 9 | 10 | from .base58 import is_base58, base58_decode 11 | from .exceptions import NotAFlickrUrl, UnrecognisedUrl 12 | from .types import anonymous_single_photo, ParseResult 13 | 14 | 15 | def get_page(url: hyperlink.URL | hyperlink.DecodedURL) -> int: 16 | """ 17 | Flickr does pagination by appending a `pageXX` component to the end of URLs, e.g. 18 | 19 | https://www.flickr.com/photos/belindavick/ 20 | https://www.flickr.com/photos/belindavick/page2 21 | https://www.flickr.com/photos/belindavick/page3 22 | 23 | This returns the page number for a path component. It assumes that this is 24 | a valid pagination parameter. 25 | 26 | >>> get_page('https://www.flickr.com/photos/belindavick/') 27 | 1 28 | 29 | >>> get_page('https://www.flickr.com/photos/belindavick/page3') 30 | 3 31 | 32 | """ 33 | assert len(url.path) >= 1 34 | final_component = url.path[-1] 35 | 36 | m = re.match(r"^page([0-9]+)$", final_component) 37 | 38 | if m is not None: 39 | return int(m.group(1)) 40 | else: 41 | return 1 42 | 43 | 44 | def is_page(path_component: str) -> bool: 45 | """ 46 | Returns True if a path component looks like pagination in a Flickr URL, 47 | False otherwise. 48 | """ 49 | return re.match(r"^page[0-9]+$", path_component) is not None 50 | 51 | 52 | def is_digits(path_component: str) -> bool: 53 | """ 54 | Returns True if ``path_component`` is a non-empty string of 55 | digits 0-9, False otherwise. 56 | 57 | Note: this is different from ``str.isdigit()`` or ``str.isnumeric()``, 58 | both of which admit a wider range of characters that we wouldn't 59 | expect to see in a Flickr URL. 60 | 61 | >>> '①'.isdigit() 62 | True 63 | >>> '½'.isnumeric() 64 | True 65 | 66 | """ 67 | return re.match(r"^[0-9]+$", path_component) is not None 68 | 69 | 70 | def looks_like_flickr_photo_id(text: str, /) -> bool: 71 | """ 72 | Returns True if ``text`` looks like a Flickr photo ID, False otherwise. 73 | 74 | >>> looks_like_flickr_photo_id("10875442124") 75 | True 76 | >>> looks_like_flickr_photo_id("everydayfilms") 77 | False 78 | 79 | This does not mean that ``text`` definitely is the ID of a photo 80 | on Flickr, but it does allow us to reject some common mistakes. 81 | 82 | """ 83 | return is_digits(text) 84 | 85 | 86 | def looks_like_flickr_user_id(text: str, /) -> bool: 87 | """ 88 | Returns True if ``text`` looks like a Flickr user ID, False otherwise. 89 | 90 | >>> looks_like_flickr_user_id("127885125@N05") 91 | True 92 | >>> looks_like_flickr_user_id("everydayfilms") 93 | False 94 | 95 | This does not mean that ``text`` definitely is the ID of a user 96 | on Flickr, but it does allow us to reject some common mistakes. 97 | 98 | """ 99 | return re.match(r"^[0-9]{5,11}@N[0-9]{2}$", text) is not None 100 | 101 | 102 | def parse_flickr_url(url: str, *, follow_redirects: bool = False) -> ParseResult: 103 | """ 104 | Parse a Flickr URL and return some key information, e.g. whether it's 105 | a single photo, an album, a user. 106 | 107 | The return value will be a dictionary with a key ``type`` and then some 108 | extra keys depending on the type, e.g. 109 | 110 | {"type": "single_photo", "photo_id": "50567413447"} 111 | 112 | Possible values for ``type``: 113 | 114 | - ``homepage`` 115 | 116 | - ``single_photo`` 117 | This will include a single extra key: ``photo_id``. 118 | 119 | - ``album`` 120 | This will include two extra keys: ``album_id`` and ``user_url``. 121 | Look up the latter with Flickr's ``flickr.urls.lookupUser`` API. 122 | 123 | - ``user`` 124 | This will include a single extra key: ``user_url``, and has an 125 | optional key ``id``. 126 | 127 | Look up the URL with Flickr's ``flickr.urls.lookupUser`` API, 128 | the ID with the ``flickr.people.getInfo`` API. 129 | 130 | - ``group`` 131 | This will include a single extra key: ``group_url``. 132 | Look it up with Flickr's ``flickr.urls.lookupGroup`` API. 133 | 134 | - ``gallery`` 135 | This will include a single extra key: ``gallery_id``. 136 | 137 | - ``tag`` 138 | This will include a single extra key: ``tag``. 139 | 140 | If you pass a URL which isn't a Flickr URL, or a Flickr URL which 141 | isn't recognised, then the function will throw ``NotAFlickrUrl`` 142 | or ``UnrecognisedUrl`` exceptions. 143 | 144 | Some Flickr URLs can only be parsed by making an HTTP request and 145 | following redirects (e.g. Guest Pass or short URLs). To avoid 146 | unexpected HTTP requests, following redirects is disabled by default, 147 | and you must opt into it if you want to use this sort of URL. 148 | 149 | """ 150 | if not isinstance(url, str): 151 | raise TypeError(f"Bad type for `url`: expected str, got {type(url).__name__}!") 152 | 153 | try: 154 | u = hyperlink.parse(url.rstrip("/")) 155 | 156 | # This is for anything which any string can't be parsed as a URL, 157 | # e.g. `https://https://` 158 | # 159 | # Arguably some of those might be malformed URLs from flickr.com, 160 | # but it's a rare enough edge case that this is fine. 161 | except hyperlink.URLParseError: 162 | raise NotAFlickrUrl(url) 163 | 164 | # Replace any empty components in the path. 165 | # 166 | # e.g. typos like https://www.flickr.com/photos/joyoflife//44627174 167 | # In this case Flickr still resolves the single photo page, and we 168 | # can tell what the person meant even if it's not explicit. 169 | u = u.replace(path=tuple(component for component in u.path if component != "")) 170 | 171 | # Handle URLs without a scheme, e.g. 172 | # 173 | # flickr.com/photos/1234 174 | # 175 | # We know what the user means, but the hyperlink URL parsing library 176 | # thinks this is just the path component, not a sans-HTTP URL. 177 | # 178 | # These lines convert this to a full HTTPS URL, i.e. 179 | # 180 | # https://flickr.com/photos/1234 181 | # 182 | # which allows the rest of the logic in the function to do 183 | # the "right thing" with this URL. 184 | if not url.startswith("http") and len(u.path) >= 1: 185 | if u.path[0].lower() in { 186 | "www.flickr.com", 187 | "flickr.com", 188 | "flic.kr", 189 | "live.staticflickr.com", 190 | "static.flickr.com", 191 | }: 192 | u = hyperlink.parse("https://" + url.rstrip("/")) 193 | 194 | elif re.match(r"^photos[0-9]+\.flickr\.com$", u.path[0].lower()) is not None: 195 | u = hyperlink.parse("https://" + url.rstrip("/")) 196 | 197 | elif ( 198 | re.match(r"^farm[0-9]+\.static\.?flickr\.com$", u.path[0].lower()) 199 | is not None 200 | ): 201 | u = hyperlink.parse("https://" + url.rstrip("/")) 202 | 203 | elif re.match(r"^c[0-9]+\.staticflickr\.com$", u.path[0].lower()) is not None: 204 | u = hyperlink.parse("https://" + url.rstrip("/")) 205 | 206 | # If this URL doesn't come from Flickr.com, then we can't possibly classify 207 | # it as a Flickr URL! 208 | is_long_url = u.host.lower() in {"www.flickr.com", "flickr.com"} 209 | is_short_url = u.host == "flic.kr" 210 | is_static_url = ( 211 | u.host == "live.staticflickr.com" 212 | or u.host == "static.flickr.com" 213 | or re.match(r"^photos[0-9]+\.flickr\.com$", u.host) is not None 214 | or re.match(r"^farm[0-9]+\.static\.flickr\.com$", u.host) is not None 215 | or re.match(r"^farm[0-9]+\.staticflickr\.com$", u.host) is not None 216 | or re.match(r"^c[0-9]+\.staticflickr\.com$", u.host) is not None 217 | ) 218 | is_commons_explorer_url = u.host == "commons.flickr.org" 219 | 220 | if ( 221 | not is_long_url 222 | and not is_short_url 223 | and not is_static_url 224 | and not is_commons_explorer_url 225 | ): 226 | raise NotAFlickrUrl(url) 227 | 228 | # This is for short URLs that point to: 229 | # 230 | # - albums, e.g. http://flic.kr/s/aHsjybZ5ZD 231 | # - gallery, e.g. https://flic.kr/y/2Xry4Jt 232 | # - people/users, e.g. https://flic.kr/ps/ZVcni 233 | # 234 | # Although we can base58 decode the album ID, that doesn't tell 235 | # us the user URL -- it goes to an intermediary "short URL" service, 236 | # and there's no obvious way in the API to go album ID -> user. 237 | if ( 238 | is_short_url 239 | and len(u.path) == 2 240 | and u.path[0] in {"s", "y", "ps"} 241 | and is_base58(u.path[1]) 242 | and follow_redirects 243 | ): 244 | try: 245 | redirected_url = str(httpx.get(url, follow_redirects=True).url) 246 | assert redirected_url != url 247 | return parse_flickr_url(redirected_url) 248 | except Exception as e: 249 | print(e) 250 | pass 251 | 252 | # This is for "guest pass" URLs that point to: 253 | # 254 | # - albums, e.g. https://www.flickr.com/gp/realphotomatt/M195SLkj98 255 | # (from https://twitter.com/PAPhotoMatt/status/1715111983974940683) 256 | # - single photos, e.g. 257 | # https://www.flickr.com/gp/199246608@N02/nSN80jZ64E 258 | # (this is one of mine) 259 | # 260 | # See https://www.flickrhelp.com/hc/en-us/articles/4404069601172-Create-or-delete-temporary-Guest-Passes-in-Flickr 261 | # 262 | # I don't think these guest pass URLs are deterministic -- they don't 263 | # contain base58 encoded IDs (notice that `nSN80jZ64E` has a `0`) and 264 | # the user can revoke them later. 265 | # 266 | # The easiest thing to do is to do an HTTP lookup. 267 | if is_long_url and len(u.path) > 1 and u.path[0] == "gp" and follow_redirects: 268 | try: 269 | redirected_url = str(httpx.get(url, follow_redirects=True).url) 270 | assert redirected_url != url 271 | return parse_flickr_url(redirected_url) 272 | except Exception as e: 273 | print(e) 274 | pass 275 | 276 | # The URL for the homepage, e.g. https://www.flickr.com/ 277 | if is_long_url and len(u.path) == 0 and len(u.query) == 0 and u.fragment == "": 278 | return {"type": "homepage"} 279 | 280 | # The URL for a single photo, e.g. 281 | # https://www.flickr.com/photos/coast_guard/32812033543/ 282 | if ( 283 | is_long_url 284 | and len(u.path) >= 3 285 | and u.path[0] == "photos" 286 | and looks_like_flickr_photo_id(u.path[2]) 287 | ): 288 | if looks_like_flickr_user_id(u.path[1].upper()): 289 | return { 290 | "type": "single_photo", 291 | "photo_id": u.path[2], 292 | "user_url": f"https://www.flickr.com/photos/{u.path[1].upper()}/", 293 | "user_id": u.path[1].upper(), 294 | } 295 | else: 296 | return { 297 | "type": "single_photo", 298 | "photo_id": u.path[2], 299 | "user_url": f"https://www.flickr.com/photos/{u.path[1]}/", 300 | "user_id": None, 301 | } 302 | 303 | # Old-style URLs for a single photo, e.g. 304 | # http://flickr.com/photo/17277074@N00/2619974961 305 | # 306 | # This is a variant of Flickr photo URL that appears fairly 307 | # regularly in e.g. Wikimedia Commons – it no longer resolves, but 308 | # there are enough of these both on WMC and around the general web 309 | # that I think this was once a supported URL format. 310 | # 311 | # It's clear enough what this means that we should be able to 312 | # parse it, even if new URLs like this are no longer created. 313 | if ( 314 | is_long_url 315 | and len(u.path) >= 3 316 | and u.path[0] == "photo" 317 | and looks_like_flickr_photo_id(u.path[2]) 318 | ): 319 | if looks_like_flickr_user_id(u.path[1].upper()): 320 | return { 321 | "type": "single_photo", 322 | "photo_id": u.path[2], 323 | "user_url": f"https://www.flickr.com/photos/{u.path[1].upper()}/", 324 | "user_id": u.path[1].upper(), 325 | } 326 | else: 327 | return { 328 | "type": "single_photo", 329 | "photo_id": u.path[2], 330 | "user_url": f"https://www.flickr.com/photos/{u.path[1]}/", 331 | "user_id": None, 332 | } 333 | 334 | # The URL for a single photo, e.g. 335 | # 336 | # https://flic.kr/p/2p4QbKN 337 | # 338 | # Here the final path component is a base-58 conversion of the photo ID. 339 | # See https://www.flickr.com/groups/51035612836@N01/discuss/72157616713786392/ 340 | if is_short_url and len(u.path) == 2 and u.path[0] == "p" and is_base58(u.path[1]): 341 | return anonymous_single_photo(photo_id=base58_decode(u.path[1])) 342 | 343 | # Another variant of URL for a single photo, e.g. 344 | # 345 | # https://www.flickr.com/photo_zoom.gne?id=196155401&size=m 346 | # https://www.flickr.com/photo_exif.gne?id=1427904898 347 | # www.flickr.com/photo.gne?id=105 348 | # https://www.flickr.com/photo.gne?short=2ouuqFT 349 | # 350 | # Today this redirects to the /sizes/ or the /meta/ page, but it's quite 351 | # commonly used in e.g. Wikimedia Commons. 352 | if ( 353 | is_long_url 354 | and len(u.path) == 1 355 | and u.path[0].startswith(("photo", "video")) 356 | and u.path[0].endswith(".gne") 357 | and len(u.get("id")) == 1 358 | ): 359 | photo_id = u.get("id")[0] 360 | 361 | if isinstance(photo_id, str) and looks_like_flickr_photo_id(photo_id): 362 | return anonymous_single_photo(photo_id) 363 | 364 | if ( 365 | is_long_url 366 | and len(u.path) == 1 367 | and u.path[0] == "photo.gne" 368 | and len(u.get("short")) == 1 369 | ): 370 | short_id = u.get("short")[0] 371 | 372 | if isinstance(short_id, str) and is_base58(short_id): 373 | return anonymous_single_photo(photo_id=base58_decode(short_id)) 374 | 375 | # The URL for an actual file, e.g. 376 | # 377 | # https://live.staticflickr.com/65535/53381630964_63d765ee92_s.jpg 378 | # http://static.flickr.com/63/155697786_0125559b4e.jpg 379 | # http://farm1.static.flickr.com/82/241708183_dd0847d5c7_o.jpg 380 | # https://farm5.staticflickr.com/4586/37767087695_bb4ecff5f4_o.jpg 381 | # 382 | # The exact format of these URLs is described in the Flickr docs: 383 | # https://www.flickr.com/services/api/misc.urls.html 384 | if ( 385 | is_static_url 386 | and ( 387 | u.host == "live.staticflickr.com" 388 | or u.host == "static.flickr.com" 389 | or re.match(r"^farm\d+\.staticflickr\.com$", u.host) 390 | or re.match(r"^farm\d+\.static\.flickr\.com$", u.host) 391 | ) 392 | and len(u.path) >= 2 393 | and is_digits(u.path[0]) 394 | ): 395 | photo_id, *_ = u.path[1].split("_") 396 | if looks_like_flickr_photo_id(photo_id): 397 | return anonymous_single_photo(photo_id) 398 | 399 | # The URL for a static video file, e.g. 400 | # 401 | # https://live.staticflickr.com/video/52868534222/346a41e5a9/1080p.mp4 402 | # 403 | if ( 404 | is_static_url 405 | and u.host == "live.staticflickr.com" 406 | and len(u.path) >= 2 407 | and u.path[0] == "video" 408 | and looks_like_flickr_photo_id(u.path[1]) 409 | ): 410 | return anonymous_single_photo(photo_id=u.path[1]) 411 | 412 | # The URL for a static file, e.g. 413 | # 414 | # https://photos12.flickr.com/16159487_3a6615a565_b.jpg 415 | # 416 | if ( 417 | is_static_url 418 | and re.match(r"^photos\d+\.flickr\.com$", u.host) 419 | and len(u.path) >= 1 420 | ): 421 | photo_id, *_ = u.path[0].split("_") 422 | if looks_like_flickr_photo_id(photo_id): 423 | return anonymous_single_photo(photo_id) 424 | 425 | # The URL for a static file, e.g. 426 | # 427 | # https://c8.staticflickr.com/6/5159/14288803431_7cf094b085_b.jpg 428 | # 429 | if is_static_url and ( 430 | re.match(r"^c\d+\.staticflickr\.com$", u.host) 431 | and len(u.path) == 3 432 | and is_digits(u.path[0]) 433 | and is_digits(u.path[1]) 434 | ): 435 | photo_id, *_ = u.path[2].split("_") 436 | if looks_like_flickr_photo_id(photo_id): 437 | return anonymous_single_photo(photo_id) 438 | 439 | # The URL for an album, e.g. 440 | # 441 | # https://www.flickr.com/photos/cat_tac/albums/72157666833379009 442 | # https://www.flickr.com/photos/cat_tac/sets/72157666833379009 443 | # https://www.flickr.com/photos/andygocher/albums/72157648252420622/page3 444 | # 445 | if ( 446 | is_long_url 447 | and 4 <= len(u.path) <= 5 448 | and u.path[0] == "photos" 449 | and u.path[2] in {"albums", "sets"} 450 | and is_digits(u.path[3]) 451 | ): 452 | return { 453 | "type": "album", 454 | "user_url": f"https://www.flickr.com/photos/{u.path[1]}/", 455 | "album_id": u.path[3], 456 | "page": get_page(u), 457 | } 458 | 459 | # The URL for a user, e.g. 460 | # 461 | # https://www.flickr.com/photos/blueminds/ 462 | # https://www.flickr.com/people/blueminds/ 463 | # https://www.flickr.com/photos/blueminds/albums 464 | # https://www.flickr.com/people/blueminds/page3 465 | # https://www.flickr.com/photos/blueminds/?saved=1 466 | # 467 | if is_long_url and len(u.path) >= 2 and u.path[0] in {"photos", "people"}: 468 | user_url = f"https://www.flickr.com/photos/{u.path[1]}/" 469 | 470 | if looks_like_flickr_user_id(u.path[1]): 471 | user_id = u.path[1] 472 | else: 473 | user_id = None 474 | 475 | if len(u.path) == 2: 476 | page = 1 477 | elif len(u.path) == 3 and u.path[2] == "albums": 478 | page = 1 479 | elif len(u.path) == 3 and is_page(u.path[2]): 480 | page = get_page(u) 481 | else: 482 | page = None 483 | 484 | if page is not None: 485 | return { 486 | "type": "user", 487 | "page": page, 488 | "user_url": user_url, 489 | "user_id": user_id, 490 | } 491 | 492 | # The URL for a member in the Commons Explorer, e.g. 493 | # 494 | # https://commons.flickr.org/members/cadl_localhistory/ 495 | # https://commons.flickr.org/members/107895189@N03/ 496 | # 497 | if is_commons_explorer_url and len(u.path) == 2 and u.path[0] == "members": 498 | user_url = f"https://www.flickr.com/photos/{u.path[1]}/" 499 | 500 | if looks_like_flickr_user_id(u.path[1]): 501 | user_id = u.path[1] 502 | else: 503 | user_id = None 504 | 505 | return { 506 | "type": "user", 507 | "page": 1, 508 | "user_url": user_url, 509 | "user_id": user_id, 510 | } 511 | 512 | # URLs for a group, e.g. 513 | # 514 | # https://www.flickr.com/groups/slovenia/pool 515 | # https://www.flickr.com/groups/slovenia 516 | # https://www.flickr.com/groups/slovenia/pool/page16 517 | # 518 | if is_long_url and len(u.path) >= 2 and u.path[0] == "groups": 519 | if len(u.path) == 2: 520 | return { 521 | "type": "group", 522 | "group_url": f"https://www.flickr.com/groups/{u.path[1]}", 523 | "page": 1, 524 | } 525 | 526 | if u.path[2] == "pool": 527 | return { 528 | "type": "group", 529 | "group_url": f"https://www.flickr.com/groups/{u.path[1]}", 530 | "page": get_page(u), 531 | } 532 | 533 | # URLs for a gallery, e.g. 534 | # 535 | # https://www.flickr.com/photos/flickr/gallery/72157722096057728/ 536 | # https://www.flickr.com/photos/flickr/gallery/72157722096057728/page2 537 | # https://www.flickr.com/photos/flickr/galleries/72157690638331410/ 538 | # 539 | if ( 540 | is_long_url 541 | and len(u.path) >= 4 542 | and u.path[0] == "photos" 543 | and u.path[2] in {"gallery", "galleries"} 544 | and is_digits(u.path[3]) 545 | ): 546 | return {"type": "gallery", "gallery_id": u.path[3], "page": get_page(u)} 547 | 548 | # URL for a tag, e.g. 549 | # 550 | # https://flickr.com/photos/tags/tennis/ 551 | # https://flickr.com/photos/tags/fluorspar/page1 552 | # 553 | if ( 554 | is_long_url 555 | and len(u.path) >= 3 556 | and u.path[0] == "photos" 557 | and u.path[1] == "tags" 558 | ): 559 | return {"type": "tag", "tag": u.path[2], "page": get_page(u)} 560 | 561 | # URL for the Flash player for a video, e.g. 562 | # 563 | # https://www.flickr.com/apps/video/stewart.swf?photo_id=53262935176&… 564 | # 565 | if ( 566 | is_long_url 567 | and u.path == ("apps", "video", "stewart.swf") 568 | and len(u.get("photo_id")) == 1 569 | ): 570 | photo_id = u.get("photo_id")[0] 571 | 572 | if isinstance(photo_id, str) and looks_like_flickr_photo_id(photo_id): 573 | return anonymous_single_photo(photo_id) 574 | 575 | raise UnrecognisedUrl(f"Unrecognised URL: {url}") 576 | -------------------------------------------------------------------------------- /tests/test_flickr_url_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for ``flickr_url_parser``. 3 | """ 4 | 5 | import pytest 6 | from vcr.cassette import Cassette 7 | 8 | from flickr_url_parser import ( 9 | parse_flickr_url, 10 | NotAFlickrUrl, 11 | UnrecognisedUrl, 12 | ) 13 | from flickr_url_parser.types import Album, Gallery, Group, SinglePhoto, Tag 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "url", 18 | [ 19 | "", 20 | "1.2.3.4", 21 | "https://example.net", 22 | "ftp://s3.amazonaws.com/my-bukkit/object.txt", 23 | "http://http://", 24 | "#cite_note-1", 25 | ], 26 | ) 27 | def test_it_rejects_a_url_which_isnt_flickr(url: str) -> None: 28 | """ 29 | Any fragment of text which can be parsed as a URL but isn't 30 | a Flickr URL throws ``NotAFlickrUrl``. 31 | """ 32 | with pytest.raises(NotAFlickrUrl): 33 | parse_flickr_url(url) 34 | 35 | 36 | @pytest.mark.parametrize( 37 | "url", 38 | [ 39 | "https://www.flickr.com/account/email", 40 | "https://www.flickr.com/photo_zoom.gne", 41 | "https://www.flickr.com/photo_zoom.gne?id=unknown", 42 | # The characters in these examples are drawn from the 43 | # Unicode Numeric Property Definitions: 44 | # https://www.unicode.org/L2/L2012/12310-numeric-type-def.html 45 | # 46 | # In particular, all of these are characters that return True 47 | # for Python's ``str.isnumeric()`` function, but we don't expect 48 | # to see in a Flickr URL. 49 | "https://www.flickr.com/photos/fractions/½⅓¼⅕⅙⅐", 50 | "https://www.flickr.com/photos/circled/sets/①②③", 51 | "https://www.flickr.com/photos/numerics/galleries/Ⅰ፩൲〡", 52 | # A discussion page for a group 53 | "https://www.flickr.com/groups/slovenia/discuss/", 54 | # A malformed URL to a static photo 55 | "https://live.staticflickr.com/7372/help.jpg", 56 | "photos12.flickr.com/robots.txt", 57 | "http://farm1.static.flickr.com/82/241abc183_dd0847d5c7_o.jpg", 58 | "https://farm5.staticflickr.com/4586/377abc695_bb4ecff5f4_o.jpg", 59 | "https://c8.staticflickr.com/6/5159/142abc431_7cf094b085_b.jpg", 60 | "farm3.static.flickr.com", 61 | "https://www.flickr.com/photo.gne?short=-1", 62 | "https://www.flickr.com/apps/video/stewart.swf?photo_id=-1", 63 | "https://commons.flickr.org/", 64 | "https://commons.flickr.org/about/", 65 | ], 66 | ) 67 | def test_it_rejects_a_flickr_url_which_does_not_have_photos(url: str) -> None: 68 | """ 69 | URLs on a Flickr.com domain which can't be identified throw 70 | ``UnrecognisedUrl``. 71 | """ 72 | with pytest.raises(UnrecognisedUrl): 73 | parse_flickr_url(url) 74 | 75 | 76 | @pytest.mark.parametrize("protocol", ["http://", "https://", ""]) 77 | @pytest.mark.parametrize("host", ["flickr.com", "www.flickr.com"]) 78 | def test_it_can_parse_variations_of_url(protocol: str, host: str) -> None: 79 | """ 80 | A URL will be parsed consistently, even if there are variations in 81 | the protocol/domain. 82 | """ 83 | url = f"{protocol}{host}/photos/coast_guard/32812033543" 84 | 85 | assert parse_flickr_url(url) == { 86 | "type": "single_photo", 87 | "photo_id": "32812033543", 88 | "user_url": "https://www.flickr.com/photos/coast_guard/", 89 | "user_id": None, 90 | } 91 | 92 | 93 | @pytest.mark.parametrize( 94 | "url", 95 | [ 96 | "https://www.flickr.com", 97 | "https://www.flickr.com/", 98 | "http://www.flickr.com", 99 | "http://www.flickr.com/", 100 | "https://flickr.com", 101 | "https://flickr.com/", 102 | "http://flickr.com", 103 | "http://flickr.com/", 104 | "www.flickr.com", 105 | "flickr.com", 106 | ], 107 | ) 108 | def test_it_can_parse_the_homepage(url: str) -> None: 109 | """ 110 | It can parse different forms of the homepage URL, varying by: 111 | 112 | * protocol 113 | * domain name 114 | * trailing slash or not 115 | 116 | """ 117 | assert parse_flickr_url(url) == {"type": "homepage"} 118 | 119 | 120 | @pytest.mark.parametrize( 121 | ["url", "single_photo"], 122 | [ 123 | ( 124 | "https://www.flickr.com/photos/coast_guard/32812033543", 125 | { 126 | "type": "single_photo", 127 | "photo_id": "32812033543", 128 | "user_url": "https://www.flickr.com/photos/coast_guard/", 129 | "user_id": None, 130 | }, 131 | ), 132 | ( 133 | "https://www.flickr.com/photos/coast_guard/32812033543/in/photolist-RZufqg-ebEcP7-YvCkaU-2dKrfhV-6o5anp-7ZjJuj-fxZTiu-2c1pGwi-JbqooJ-TaNkv5-ehrqn7-2aYFaRh-QLDxJX-2dKrdip-JB7iUz-ehrsNh-2aohZ14-Rgeuo3-JRwKwE-ksAR6U-dZVQ3m-291gkvk-26ynYWn-pHMQyE-a86UD8-9Tpmru-hamg6T-8ZCRFU-QY8amt-2eARQfP-qskFkD-2c1pG1Z-jbCpyF-fTBQDa-a89xfd-a7kYMs-dYjL51-5XJgXY-8caHdL-a89HZd-9GBmft-xy7PBo-sai77d-Vs8YPG-RgevC7-Nv5CF6-e4ZLn9-cPaxqS-9rnjS9-8Y7mhm", 134 | { 135 | "type": "single_photo", 136 | "photo_id": "32812033543", 137 | "user_url": "https://www.flickr.com/photos/coast_guard/", 138 | "user_id": None, 139 | }, 140 | ), 141 | ( 142 | "https://www.flickr.com/photos/britishlibrary/13874001214/in/album-72157644007437024/", 143 | { 144 | "type": "single_photo", 145 | "photo_id": "13874001214", 146 | "user_url": "https://www.flickr.com/photos/britishlibrary/", 147 | "user_id": None, 148 | }, 149 | ), 150 | ( 151 | "https://www.Flickr.com/photos/techiedog/44257407", 152 | { 153 | "type": "single_photo", 154 | "photo_id": "44257407", 155 | "user_url": "https://www.flickr.com/photos/techiedog/", 156 | "user_id": None, 157 | }, 158 | ), 159 | ( 160 | "www.Flickr.com/photos/techiedog/44257407", 161 | { 162 | "type": "single_photo", 163 | "photo_id": "44257407", 164 | "user_url": "https://www.flickr.com/photos/techiedog/", 165 | "user_id": None, 166 | }, 167 | ), 168 | ( 169 | "https://www.flickr.com/photos/tanaka_juuyoh/1866762301/sizes/o/in/set-72157602201101937", 170 | { 171 | "type": "single_photo", 172 | "photo_id": "1866762301", 173 | "user_url": "https://www.flickr.com/photos/tanaka_juuyoh/", 174 | "user_id": None, 175 | }, 176 | ), 177 | # 178 | # Strictly speaking this URL is invalid, because of the lowercase @n02, 179 | # which should be uppercase. But we know what you meant, so parse it anyway. 180 | ( 181 | "https://www.flickr.com/photos/11588490@n02/2174280796/sizes/l", 182 | { 183 | "type": "single_photo", 184 | "photo_id": "2174280796", 185 | "user_url": "https://www.flickr.com/photos/11588490@N02/", 186 | "user_id": "11588490@N02", 187 | }, 188 | ), 189 | ( 190 | "https://www.flickr.com/photos/nrcs_south_dakota/8023844010/in", 191 | { 192 | "type": "single_photo", 193 | "photo_id": "8023844010", 194 | "user_url": "https://www.flickr.com/photos/nrcs_south_dakota/", 195 | "user_id": None, 196 | }, 197 | ), 198 | ( 199 | "https://www.flickr.com/photos/chucksutherland/6738252077/player/162ed63802", 200 | { 201 | "type": "single_photo", 202 | "photo_id": "6738252077", 203 | "user_url": "https://www.flickr.com/photos/chucksutherland/", 204 | "user_id": None, 205 | }, 206 | ), 207 | ( 208 | "http://flickr.com/photo/17277074@N00/2619974961", 209 | { 210 | "type": "single_photo", 211 | "photo_id": "2619974961", 212 | "user_url": "https://www.flickr.com/photos/17277074@N00/", 213 | "user_id": "17277074@N00", 214 | }, 215 | ), 216 | ( 217 | "http://flickr.com/photo/art-sarah/2619974961", 218 | { 219 | "type": "single_photo", 220 | "photo_id": "2619974961", 221 | "user_url": "https://www.flickr.com/photos/art-sarah/", 222 | "user_id": None, 223 | }, 224 | ), 225 | ( 226 | "https://www.flickr.com/photos/gracewong/196155401/meta/", 227 | { 228 | "type": "single_photo", 229 | "photo_id": "196155401", 230 | "user_url": "https://www.flickr.com/photos/gracewong/", 231 | "user_id": None, 232 | }, 233 | ), 234 | # 235 | # From https://commons.wikimedia.org/wiki/File:75016-75017_Avenues_Foch_et_de_la_Grande_Armée_20050919.jpg 236 | # Retrieved 12 December 2023 237 | ( 238 | "https://www.flickr.com/photos/joyoflife//44627174", 239 | { 240 | "type": "single_photo", 241 | "photo_id": "44627174", 242 | "user_url": "https://www.flickr.com/photos/joyoflife/", 243 | "user_id": None, 244 | }, 245 | ), 246 | ], 247 | ) 248 | def test_it_parses_a_single_photo_with_user_info( 249 | url: str, single_photo: SinglePhoto 250 | ) -> None: 251 | """ 252 | It can parse different forms of single photo URL. 253 | """ 254 | assert parse_flickr_url(url) == single_photo 255 | 256 | 257 | @pytest.mark.parametrize( 258 | ["url", "photo_id"], 259 | [ 260 | ( 261 | "https://live.staticflickr.com/65535/53381630964_63d765ee92_s.jpg", 262 | "53381630964", 263 | ), 264 | ("photos12.flickr.com/16159487_3a6615a565_b.jpg", "16159487"), 265 | ("http://farm1.static.flickr.com/82/241708183_dd0847d5c7_o.jpg", "241708183"), 266 | ("farm1.static.flickr.com/82/241708183_dd0847d5c7_o.jpg", "241708183"), 267 | ("https://www.flickr.com/photo_zoom.gne?id=196155401&size=m", "196155401"), 268 | ("https://www.flickr.com/photo_exif.gne?id=1427904898", "1427904898"), 269 | # This URL is linked from https://commons.wikimedia.org/wiki/File:Adriaen_Brouwer_-_The_slaughter_feast.jpg 270 | ( 271 | "https://farm5.staticflickr.com/4586/37767087695_bb4ecff5f4_o.jpg", 272 | "37767087695", 273 | ), 274 | # 275 | # From https://commons.wikimedia.org/wiki/File:Maradona_Soccer_Aid.jpg 276 | # Retrieved 12 December 2023 277 | ("static.flickr.com/63/155697786_0125559b4e.jpg", "155697786"), 278 | ("http://static.flickr.com/63/155697786_0125559b4e.jpg", "155697786"), 279 | # 280 | # From https://commons.wikimedia.org/wiki/File:Ice_Cream_Stand_on_Denman_Island.jpg 281 | # Retrieved 12 December 2023 282 | ("www.flickr.com/photo.gne?id=105", "105"), 283 | # 284 | # From https://commons.wikimedia.org/wiki/File:IgnazioDanti.jpg 285 | # Retrieved 12 December 2023 286 | ("c8.staticflickr.com/6/5159/14288803431_7cf094b085_b.jpg", "14288803431"), 287 | # 288 | # From https://commons.wikimedia.org/wiki/File:The_Peace_Hat_and_President_Chester_Arthur,_1829_-_1886_(3435827496).jpg 289 | # Retrieved 20 December 2023 290 | ("www.flickr.com/photo_edit.gne?id=3435827496", "3435827496"), 291 | # 292 | # From https://commons.wikimedia.org/wiki/File:Mars_-_Valles_Marineris,_Melas_Chasma_-_ESA_Mars_Express_(52830681359).png 293 | # Retrieved 20 December 2023 294 | ("https://www.flickr.com/photo.gne?short=2ouuqFT", "52830949513"), 295 | # 296 | # This is the download URL from https://www.flickr.com/photos/196406308@N04/52947513801 297 | # Retrieved 2 January 2024 298 | ("https://www.flickr.com/video_download.gne?id=52947513801", "52947513801"), 299 | # 300 | # This is the download URL you get redirected to from 301 | # https://www.flickr.com/photos/83699771@N00/52868534222 302 | # Retrieved 2 January 2024 303 | ( 304 | "https://live.staticflickr.com/video/52868534222/346a41e5a9/1080p.mp4", 305 | "52868534222", 306 | ), 307 | # 308 | # This URL comes from the flickr.photos.getSizes API for 309 | # this photo. 310 | # Retrieved 2 January 2024 311 | ( 312 | "https://www.flickr.com/apps/video/stewart.swf?v=2968162862&photo_id=53262935176&photo_secret=06c382eee3", 313 | "53262935176", 314 | ), 315 | ], 316 | ) 317 | def test_it_parses_a_single_photo_without_user_info(url: str, photo_id: str) -> None: 318 | """ 319 | Parse variants of the single photo URL that don't give any information 320 | about the photo's owner. 321 | """ 322 | assert parse_flickr_url(url) == { 323 | "type": "single_photo", 324 | "photo_id": photo_id, 325 | "user_url": None, 326 | "user_id": None, 327 | } 328 | 329 | 330 | def test_it_parses_a_short_flickr_url() -> None: 331 | """ 332 | Parse a short URL which redirects to a single photo. 333 | """ 334 | assert parse_flickr_url(url="https://flic.kr/p/2p4QbKN") == { 335 | "type": "single_photo", 336 | "photo_id": "53208249252", 337 | "user_url": None, 338 | "user_id": None, 339 | } 340 | 341 | 342 | @pytest.mark.parametrize( 343 | ["url", "album"], 344 | [ 345 | ( 346 | "https://www.flickr.com/photos/cat_tac/albums/72157666833379009", 347 | { 348 | "type": "album", 349 | "user_url": "https://www.flickr.com/photos/cat_tac/", 350 | "album_id": "72157666833379009", 351 | "page": 1, 352 | }, 353 | ), 354 | ( 355 | "https://www.flickr.com/photos/cat_tac/sets/72157666833379009", 356 | { 357 | "type": "album", 358 | "user_url": "https://www.flickr.com/photos/cat_tac/", 359 | "album_id": "72157666833379009", 360 | "page": 1, 361 | }, 362 | ), 363 | ( 364 | "https://www.flickr.com/photos/andygocher/albums/72157648252420622/page3", 365 | { 366 | "type": "album", 367 | "user_url": "https://www.flickr.com/photos/andygocher/", 368 | "album_id": "72157648252420622", 369 | "page": 3, 370 | }, 371 | ), 372 | ], 373 | ) 374 | def test_it_parses_an_album(url: str, album: Album) -> None: 375 | """ 376 | Parse album URLs. 377 | """ 378 | assert parse_flickr_url(url) == album 379 | 380 | 381 | @pytest.mark.parametrize( 382 | "url", 383 | [ 384 | pytest.param("http://flic.kr/s/aHsjybZ5ZD", id="http-aHsjybZ5ZD"), 385 | pytest.param("https://flic.kr/s/aHsjybZ5ZD", id="https-aHsjybZ5ZD"), 386 | ], 387 | ) 388 | def test_it_parses_a_short_album_url(vcr_cassette: Cassette, url: str) -> None: 389 | """ 390 | Parse short URLs which redirect to albums. 391 | """ 392 | assert parse_flickr_url(url, follow_redirects=True) == { 393 | "type": "album", 394 | "user_url": "https://www.flickr.com/photos/64527945@N07/", 395 | "album_id": "72157628959784871", 396 | "page": 1, 397 | } 398 | 399 | 400 | @pytest.mark.parametrize( 401 | "url", 402 | [ 403 | pytest.param("http://flic.kr/s/", id="http-s"), 404 | pytest.param("http://flic.kr/s/---", id="dashes"), 405 | pytest.param("https://flic.kr/s/aaaaaaaaaaaaa", id="aaaaaaaaaaaaa"), 406 | ], 407 | ) 408 | def test_it_doesnt_parse_bad_short_album_urls(vcr_cassette: Cassette, url: str) -> None: 409 | """ 410 | Parsing a short URL which looks like an album but doesn't redirect 411 | to one throws ``UnrecognisedUrl`` 412 | """ 413 | with pytest.raises(UnrecognisedUrl): 414 | parse_flickr_url(url) 415 | 416 | 417 | @pytest.mark.parametrize( 418 | "url", 419 | [ 420 | "https://www.flickr.com/photos/blueminds/", 421 | "https://www.flickr.com/people/blueminds/", 422 | "https://www.flickr.com/photos/blueminds/albums", 423 | "https://www.flickr.com/photos/blueminds/?saved=1", 424 | ], 425 | ) 426 | def test_it_parses_a_user(url: str) -> None: 427 | """ 428 | Parse a user's profile URL with a path alias. 429 | """ 430 | assert parse_flickr_url(url) == { 431 | "type": "user", 432 | "user_url": "https://www.flickr.com/photos/blueminds/", 433 | "user_id": None, 434 | "page": 1, 435 | } 436 | 437 | 438 | @pytest.mark.parametrize( 439 | "url", 440 | [ 441 | "https://www.flickr.com/photos/47265398@N04/", 442 | "https://www.flickr.com/people/47265398@N04/", 443 | "https://www.flickr.com/photos/47265398@N04/albums", 444 | "https://www.flickr.com/photos/47265398@N04/?saved=1", 445 | ], 446 | ) 447 | def test_it_parses_a_user_with_id(url: str) -> None: 448 | """ 449 | Parse a user's profile URL with their NSID. 450 | """ 451 | assert parse_flickr_url(url) == { 452 | "type": "user", 453 | "user_url": "https://www.flickr.com/photos/47265398@N04/", 454 | "user_id": "47265398@N04", 455 | "page": 1, 456 | } 457 | 458 | 459 | def test_parses_a_commons_explorer_url_with_path_alias() -> None: 460 | """ 461 | Parse a Commons Explorer member page URL with a path alias. 462 | """ 463 | url = "https://commons.flickr.org/members/swedish_heritage_board/" 464 | 465 | assert parse_flickr_url(url) == { 466 | "type": "user", 467 | "user_url": "https://www.flickr.com/photos/swedish_heritage_board/", 468 | "user_id": None, 469 | "page": 1, 470 | } 471 | 472 | 473 | def test_parses_a_commons_explorer_url_with_user_id() -> None: 474 | """ 475 | Parse a Commons Explorer member page URL with a user ID. 476 | """ 477 | url = "https://commons.flickr.org/members/107895189@N03/" 478 | 479 | assert parse_flickr_url(url) == { 480 | "type": "user", 481 | "user_url": "https://www.flickr.com/photos/107895189@N03/", 482 | "user_id": "107895189@N03", 483 | "page": 1, 484 | } 485 | 486 | 487 | def test_it_gets_page_information_about_user_urls() -> None: 488 | """ 489 | Get the page number from a paginated URL in a user's photostream. 490 | """ 491 | assert parse_flickr_url("https://www.flickr.com/photos/blueminds/page3") == { 492 | "type": "user", 493 | "user_url": "https://www.flickr.com/photos/blueminds/", 494 | "user_id": None, 495 | "page": 3, 496 | } 497 | 498 | 499 | def test_it_parses_a_short_user_url(vcr_cassette: Cassette) -> None: 500 | """ 501 | Parse a short URL which redirects to a user's photostream. 502 | """ 503 | assert parse_flickr_url("https://flic.kr/ps/ZVcni", follow_redirects=True) == { 504 | "type": "user", 505 | "user_url": "https://www.flickr.com/photos/astrosamantha/", 506 | "user_id": None, 507 | "page": 1, 508 | } 509 | 510 | 511 | @pytest.mark.parametrize( 512 | "url", 513 | [ 514 | pytest.param("https://flic.kr/ps", id="ps"), 515 | pytest.param("https://flic.kr/ps/ZVcni/extra-bits", id="extra-bits"), 516 | pytest.param("https://flic.kr/ps/ZZZZZZZZZ", id="ZZZZZZZZZ"), 517 | ], 518 | ) 519 | @pytest.mark.parametrize("follow_redirects", [True, False]) 520 | def test_it_doesnt_parse_bad_short_user_urls( 521 | vcr_cassette: Cassette, url: str, follow_redirects: bool 522 | ) -> None: 523 | """ 524 | Parsing a short URL which has the `/ps` path component for a photostream 525 | but doesn't redirect to one throws ``UnrecognisedUrl`` 526 | """ 527 | with pytest.raises(UnrecognisedUrl): 528 | parse_flickr_url(url, follow_redirects=follow_redirects) 529 | 530 | 531 | @pytest.mark.parametrize( 532 | ["url", "group"], 533 | [ 534 | ( 535 | "https://www.flickr.com/groups/slovenia/pool/", 536 | { 537 | "type": "group", 538 | "group_url": "https://www.flickr.com/groups/slovenia", 539 | "page": 1, 540 | }, 541 | ), 542 | ( 543 | "https://www.flickr.com/groups/slovenia/", 544 | { 545 | "type": "group", 546 | "group_url": "https://www.flickr.com/groups/slovenia", 547 | "page": 1, 548 | }, 549 | ), 550 | ( 551 | "https://www.flickr.com/groups/slovenia/pool/page30", 552 | { 553 | "type": "group", 554 | "group_url": "https://www.flickr.com/groups/slovenia", 555 | "page": 30, 556 | }, 557 | ), 558 | ], 559 | ) 560 | def test_it_parses_a_group(url: str, group: Group) -> None: 561 | """ 562 | Parse URLs to a group. 563 | """ 564 | assert parse_flickr_url(url) == group 565 | 566 | 567 | @pytest.mark.parametrize( 568 | ["url", "gallery"], 569 | [ 570 | ( 571 | "https://www.flickr.com/photos/flickr/gallery/72157722096057728/", 572 | {"type": "gallery", "gallery_id": "72157722096057728", "page": 1}, 573 | ), 574 | ( 575 | "https://www.flickr.com/photos/flickr/gallery/72157722096057728/page2", 576 | {"type": "gallery", "gallery_id": "72157722096057728", "page": 2}, 577 | ), 578 | ( 579 | "https://www.flickr.com/photos/flickr/galleries/72157722096057728/", 580 | {"type": "gallery", "gallery_id": "72157722096057728", "page": 1}, 581 | ), 582 | ], 583 | ) 584 | def test_it_parses_a_gallery(url: str, gallery: Gallery) -> None: 585 | """ 586 | Parse gallery URLs. 587 | """ 588 | assert parse_flickr_url(url) == gallery 589 | 590 | 591 | @pytest.mark.parametrize( 592 | "url", 593 | [ 594 | pytest.param("https://flic.kr/y/2Xry4Jt", id="https-2Xry4Jt"), 595 | pytest.param("http://flic.kr/y/2Xry4Jt", id="http-2Xry4Jt"), 596 | ], 597 | ) 598 | def test_it_parses_a_short_gallery(vcr_cassette: Cassette, url: str) -> None: 599 | """ 600 | Parse a short URL which redirects to a gallery. 601 | """ 602 | assert parse_flickr_url(url, follow_redirects=True) == { 603 | "type": "gallery", 604 | "gallery_id": "72157690638331410", 605 | "page": 1, 606 | } 607 | 608 | 609 | @pytest.mark.parametrize( 610 | "url", 611 | [ 612 | pytest.param("https://flic.kr/y/222222222222", id="222222222222"), 613 | pytest.param("http://flic.kr/y/!!!", id="!!!"), 614 | ], 615 | ) 616 | def test_it_doesnt_parse_bad_short_gallery_urls( 617 | vcr_cassette: Cassette, url: str 618 | ) -> None: 619 | """ 620 | Parsing a short URL which has the `/y` path component for a gallery 621 | but doesn't redirect to one throws ``UnrecognisedUrl``. 622 | """ 623 | with pytest.raises(UnrecognisedUrl): 624 | parse_flickr_url(url) 625 | 626 | 627 | @pytest.mark.parametrize( 628 | ["url", "tag"], 629 | [ 630 | ( 631 | "https://flickr.com/photos/tags/fluorspar/", 632 | {"type": "tag", "tag": "fluorspar", "page": 1}, 633 | ), 634 | ( 635 | "https://flickr.com/photos/tags/fluorspar/page1", 636 | {"type": "tag", "tag": "fluorspar", "page": 1}, 637 | ), 638 | ( 639 | "https://flickr.com/photos/tags/fluorspar/page5", 640 | {"type": "tag", "tag": "fluorspar", "page": 5}, 641 | ), 642 | ], 643 | ) 644 | def test_it_parses_a_tag(url: str, tag: Tag) -> None: 645 | """ 646 | Parse tag URLs. 647 | """ 648 | assert parse_flickr_url(url) == tag 649 | 650 | 651 | GUEST_PASS_URL_TEST_CASES = [ 652 | # from https://twitter.com/PAPhotoMatt/status/1715111983974940683 653 | pytest.param( 654 | "https://www.flickr.com/gp/realphotomatt/M195SLkj98", 655 | { 656 | "type": "album", 657 | "user_url": "https://www.flickr.com/photos/realphotomatt/", 658 | "album_id": "72177720312002426", 659 | "page": 1, 660 | }, 661 | id="M195SLkj98", 662 | ), 663 | # one of mine (Alex's) 664 | pytest.param( 665 | "https://www.flickr.com/gp/199246608@N02/nSN80jZ64E", 666 | { 667 | "type": "single_photo", 668 | "photo_id": "53279364618", 669 | "user_url": "https://www.flickr.com/photos/199246608@N02/", 670 | "user_id": "199246608@N02", 671 | }, 672 | id="nSN80jZ64E", 673 | ), 674 | ] 675 | 676 | 677 | @pytest.mark.parametrize(["url", "expected"], GUEST_PASS_URL_TEST_CASES) 678 | def test_it_parses_guest_pass_urls( 679 | vcr_cassette: Cassette, url: str, expected: dict[str, str] 680 | ) -> None: 681 | """ 682 | Parse guest pass URLs. 683 | 684 | Note: Guest Pass URLs are used to give somebody access to content 685 | on Flickr, even if (1) the content is private or (2) the person 686 | looking at the content isn't logged in. 687 | 688 | We should be a bit careful about test cases here, and only use 689 | Guest Pass URLs that have been shared publicly, to avoid accidentally 690 | sharing a public link to somebody's private photos. 691 | 692 | See https://www.flickrhelp.com/hc/en-us/articles/4404078163732-Change-your-privacy-settings 693 | """ 694 | assert parse_flickr_url(url, follow_redirects=True) == expected 695 | 696 | 697 | @pytest.mark.parametrize(["url", "expected"], GUEST_PASS_URL_TEST_CASES) 698 | def test_no_guest_pass_if_no_follow_redirects( 699 | url: str, expected: dict[str, str] 700 | ) -> None: 701 | """ 702 | Guest pass URLs aren't parsed if `follow_redirects=False`. 703 | """ 704 | with pytest.raises(UnrecognisedUrl): 705 | parse_flickr_url(url, follow_redirects=False) 706 | 707 | 708 | def test_it_doesnt_parse_a_broken_guest_pass_url(vcr_cassette: Cassette) -> None: 709 | """ 710 | Parsing a URL which has the `/gp` path component for a guest pass 711 | but doesn't redirect to one throws ``UnrecognisedUrl``. 712 | """ 713 | with pytest.raises(UnrecognisedUrl): 714 | parse_flickr_url( 715 | url="https://www.flickr.com/gp/1234/doesnotexist", follow_redirects=True 716 | ) 717 | 718 | 719 | def test_a_non_string_is_an_error() -> None: 720 | """ 721 | Parsing a non-string/non-URL value throws ``UnrecognisedUrl``. 722 | """ 723 | with pytest.raises(TypeError, match="Bad type for `url`: expected str, got int!"): 724 | parse_flickr_url(url=-1) # type: ignore 725 | -------------------------------------------------------------------------------- /tests/fixtures/cassettes/test_it_doesnt_parse_a_broken_guest_pass_url.yml: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: '' 4 | headers: 5 | accept: 6 | - '*/*' 7 | accept-encoding: 8 | - gzip, deflate 9 | connection: 10 | - keep-alive 11 | host: 12 | - www.flickr.com 13 | user-agent: 14 | - python-httpx/0.24.1 15 | method: GET 16 | uri: https://www.flickr.com/gp/1234/doesnotexist 17 | response: 18 | content: "\n\n\n\tFlickr: Page Not Found\n\t\n\t\n\t\n\t\n\n\n\n\n\n\t\n\n\n\n\n\n\n\n\n\n\n\n\t\t\n\t\t\n\t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n 204 | \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\t\n\n\n\n\n\n\t\n\n\n\n\n\n\n\n\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\n 256 | \t\t\t\t\t\t\t\n\n\n\n
\n\t\n\t
\n\n\t\t\n\n\t\t
\n\t\t\t\t\t\t\n\t\t\t\t\t
\n\n\t\t
\n\n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t
\n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t
\n\t\t\t\n\t\t\t
    \n\t\t\t\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t
\n\t\t
\n\t
\n
\n\n\n\n\n\t\n\n\n\n\n\n\n\n\t\n\t\n
\n\n\t

Page Not Found

\n\n\t
\n\t\t

Oops! Looks like you followed 305 | a bad link.

\n\t\t

If you think this is a problem with Flickr, please 306 | tell us.

\n\t\t

Here's a link to the home page.

\n\t
\n\n\n
\n \t
\n \n\t
\n\t\t\t\t\t\t\t\t\t\t\t\t
\n
\n
\t\t\t\t\t\t\t\t\t
\n\t\n
\n\n\n\n\n\n\n
\n\n
\n\n\n\n\n\n\n\n\n\n\n\n\t\t\t\n\t\n\t\n\t\n\t\n\t\n\t\n\n\t\t\t\n\t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" 387 | headers: 388 | Connection: 389 | - keep-alive 390 | Content-Type: 391 | - text/html; charset=utf-8 392 | Date: 393 | - Mon, 23 Oct 2023 09:21:48 GMT 394 | Transfer-Encoding: 395 | - chunked 396 | Via: 397 | - 1.1 3f56d86af987a5808c3846bdd32ffcf2.cloudfront.net (CloudFront) 398 | X-Amz-Cf-Id: 399 | - AoYKBUi1hP6mvCmVvTCmUt4Hpj7LVs3j3umfQ7fDzFgO9SlX2lI6hQ== 400 | X-Amz-Cf-Pop: 401 | - LHR50-P8 402 | X-Cache: 403 | - Error from cloudfront 404 | content-security-policy: 405 | - 'style-src ''unsafe-inline'' https://*.flickr.com https://*.staticflickr.com 406 | https://*.flickr.net https://*.braintreegateway.com https://*.kaptcha.com 407 | https://*.paypal.com http://api.flickr.com https://*.pinterest.com https://connect.facebook.net 408 | https://*.facebook.com https://*.maps.api.here.com https://*.maps.cit.api.here.com 409 | https://cdn.siftscience.com https://trustarc.mgr.consensu.org/ https://*.trustarc.com; 410 | default-src https://*.flickr.com https://*.staticflickr.com https://*.flickr.net 411 | https://*.braintreegateway.com https://*.kaptcha.com https://*.paypal.com 412 | http://api.flickr.com https://*.pinterest.com https://connect.facebook.net 413 | https://*.facebook.com https://*.maps.api.here.com https://*.maps.cit.api.here.com 414 | https://cdn.siftscience.com https://trustarc.mgr.consensu.org/ https://*.trustarc.com; 415 | img-src data: blob: https://*.flickr.com https://*.flickr.net http://*.flickr.net 416 | https://*.staticflickr.com http://*.staticflickr.com https://*.yimg.com https://*.yahoo.com 417 | https://image.maps.api.here.com https://*.paypal.com https://*.pinterest.com 418 | http://*.static-alpha.flickr.com https://connect.facebook.net https://*.facebook.com 419 | https://*.maps.api.here.com https://*.maps.cit.api.here.com https://creativecommons.org 420 | https://hexagon-analytics.com https://api.mapbox.com https://*.trustarc.com; 421 | media-src https://*.flickr.com https://*.flickr.net http://*.flickr.net https://*.staticflickr.com 422 | https://*.yahoo.com; script-src ''unsafe-inline'' ''unsafe-eval'' ''nonce-3dd01a797377a674f76b1446c5234d5b'' 423 | https://*.flickr.com http://*.flickr.net https://*.flickr.net https://*.staticflickr.com 424 | https://*.analytics.yahoo.com https://yep.video.yahoo.com https://video.media.yql.yahoo.com 425 | https://*.yahooapis.com https://*.braintreegateway.com https://*.paypalobjects.com 426 | https://connect.facebook.net https://*.facebook.com https://*.maps.api.here.com 427 | https://*.maps.cit.api.here.com https://cdn.siftscience.com https://consent.trustarc.com 428 | https://trustarc.mgr.consensu.org; connect-src https://*.flickr.com https://*.flickr.net 429 | http://*.flickr.net https://*.staticflickr.com https://geo.query.yahoo.com 430 | https://*.yahooapis.com http://api.flickr.com https://*.pinterest.com http://*.yahoo.com 431 | https://*.maps.api.here.com https://*.maps.cit.api.here.com https://cdn.siftscience.com 432 | https://*.trustarc.com https://com-flickr-prod1.mini.snplow.net; frame-ancestors 433 | https://*.flickr.com;' 434 | server: 435 | - Apache/2.4.57 (Ubuntu) 436 | set-cookie: 437 | - xb=971147; expires=Thu, 20-Oct-2033 09:21:48 GMT; Max-Age=315360000; path=/; 438 | domain=.flickr.com 439 | - localization=en-us%3Buk%3Bgb; expires=Mon, 20-Oct-2025 09:21:48 GMT; Max-Age=62899200; 440 | path=/; domain=.flickr.com 441 | - flrbp=1698052908-9e464d92c1c8e4d603c93d8e971009450a23bdac; expires=Sat, 20-Apr-2024 442 | 09:21:48 GMT; Max-Age=15552000; path=/; domain=.flickr.com; HttpOnly 443 | - flrbgrp=1698052908-9e4456917ee54d24d621422eff29c947c65e9bed; expires=Sat, 444 | 20-Apr-2024 09:21:48 GMT; Max-Age=15552000; path=/; domain=.flickr.com; HttpOnly 445 | - flrbgdrp=1698052908-31f1c3bed0eb4020496447695a216585b838cff7; expires=Sat, 446 | 20-Apr-2024 09:21:48 GMT; Max-Age=15552000; path=/; domain=.flickr.com; HttpOnly 447 | - flrbgmrp=1698052908-85fd68f9321edd5e727aba4cac736aafb8d9f101; expires=Sat, 448 | 20-Apr-2024 09:21:48 GMT; Max-Age=15552000; path=/; domain=.flickr.com; HttpOnly 449 | - flrbrst=1698052908-3b7723c460a5a5302638ae426a7439f04ff5b63b; expires=Sat, 450 | 20-Apr-2024 09:21:48 GMT; Max-Age=15552000; path=/; domain=.flickr.com; HttpOnly 451 | - flrtags=1698052908-914c7e395944cc02454ad6db9c0d40df189895e3; expires=Sat, 452 | 20-Apr-2024 09:21:48 GMT; Max-Age=15552000; path=/; domain=.flickr.com; HttpOnly 453 | - flrbrp=1698052908-0e4f2b1f96940f4edaabfad64ab1248c9a3873d5; expires=Sat, 20-Apr-2024 454 | 09:21:48 GMT; Max-Age=15552000; path=/; domain=.flickr.com; HttpOnly 455 | - flrb=57; expires=Mon, 23-Oct-2023 11:21:48 GMT; Max-Age=7200; path=/; domain=.flickr.com; 456 | HttpOnly 457 | - ccc=%7B%22needsConsent%22%3Atrue%2C%22managed%22%3A0%2C%22changed%22%3A0%2C%22info%22%3A%7B%22cookieBlock%22%3A%7B%22level%22%3A0%2C%22blockRan%22%3A0%7D%7D%7D; 458 | expires=Wed, 22-Nov-2023 09:21:48 GMT; Max-Age=2592000; path=/; domain=.flickr.com 459 | - ccc=%7B%22needsConsent%22%3Atrue%2C%22managed%22%3A0%2C%22changed%22%3A0%2C%22info%22%3A%7B%22cookieBlock%22%3A%7B%22level%22%3A0%2C%22blockRan%22%3A1%7D%7D%7D; 460 | expires=Wed, 22-Nov-2023 09:21:48 GMT; Max-Age=2592000; path=/; domain=.flickr.com 461 | x-frame-options: 462 | - SAMEORIGIN 463 | http_version: HTTP/1.1 464 | status_code: 404 465 | version: 1 466 | --------------------------------------------------------------------------------