├── simple_repository_server ├── py.typed ├── tests │ ├── integration │ │ ├── __init__.py │ │ └── test_repo_dependency_injection.py │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ └── test_simple_router.py │ ├── unit │ │ ├── __init__.py │ │ └── test_utils.py │ ├── test_netrc_auth.py │ └── test__http_response_iterator.py ├── routers │ ├── __init__.py │ └── simple.py ├── __init__.py ├── _http_response_iterator.py ├── utils.py └── __main__.py ├── .gitignore ├── .github └── workflows │ ├── python-app.yml │ └── python-publish.yml ├── .pre-commit-config.yaml ├── LICENSE ├── pyproject.toml └── README.md /simple_repository_server/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | simple_repository_server/_version.py 2 | -------------------------------------------------------------------------------- /simple_repository_server/tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /simple_repository_server/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | -------------------------------------------------------------------------------- /simple_repository_server/routers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | -------------------------------------------------------------------------------- /simple_repository_server/tests/api/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | -------------------------------------------------------------------------------- /simple_repository_server/tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | -------------------------------------------------------------------------------- /simple_repository_server/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | 8 | from ._version import version as __version__ # noqa 9 | 10 | __all__ = ['__version__'] 11 | -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | name: Python application 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up Python 3.11 20 | uses: actions/setup-python@v3 21 | with: 22 | python-version: "3.11" 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install .[dev] mypy 26 | - name: Type check 27 | run: | 28 | python -m mypy ./simple_repository_server 29 | - name: Test with pytest 30 | run: | 31 | python -m pytest ./simple_repository_server 32 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | 8 | repos: 9 | - repo: https://github.com/pre-commit/pre-commit-hooks 10 | rev: "v4.4.0" 11 | hooks: 12 | - id: trailing-whitespace 13 | - id: end-of-file-fixer 14 | - id: check-yaml 15 | - id: check-toml 16 | 17 | - repo: https://github.com/asottile/add-trailing-comma 18 | rev: "v2.4.0" 19 | hooks: 20 | - id: add-trailing-comma 21 | args: 22 | - "--py36-plus" 23 | 24 | - repo: https://github.com/PyCQA/isort 25 | rev: "5.12.0" 26 | hooks: 27 | - id: isort 28 | 29 | - repo: https://github.com/pycqa/flake8 30 | rev: "6.0.0" 31 | hooks: 32 | - id: flake8 33 | args: 34 | - "--max-line-length=120" 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 simple-repository 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package to PyPI when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | release-build: 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - uses: actions/setup-python@v5 26 | with: 27 | python-version: "3.x" 28 | 29 | - name: Build release distributions 30 | run: | 31 | # NOTE: put your own distribution build steps here. 32 | python -m pip install build 33 | python -m build 34 | 35 | - name: Upload distributions 36 | uses: actions/upload-artifact@v4 37 | with: 38 | name: release-dists 39 | path: dist/ 40 | 41 | pypi-publish: 42 | runs-on: ubuntu-latest 43 | needs: 44 | - release-build 45 | permissions: 46 | # IMPORTANT: this permission is mandatory for trusted publishing 47 | id-token: write 48 | 49 | # Dedicated environments with protections for publishing are strongly recommended. 50 | # For more information, see: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#deployment-protection-rules 51 | environment: 52 | name: pypi 53 | url: https://pypi.org/p/simple-repository-server 54 | 55 | steps: 56 | - name: Retrieve release distributions 57 | uses: actions/download-artifact@v4 58 | with: 59 | name: release-dists 60 | path: dist/ 61 | 62 | - name: Publish release distributions to PyPI 63 | uses: pypa/gh-action-pypi-publish@release/v1 64 | with: 65 | packages-dir: dist/ 66 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | 8 | [build-system] 9 | requires = ["setuptools>=61", "setuptools_scm>=8"] 10 | build-backend = "setuptools.build_meta" 11 | 12 | [project] 13 | name = "simple-repository-server" 14 | dynamic = ["version"] 15 | requires-python = ">=3.11" 16 | classifiers = [ 17 | "Development Status :: 4 - Beta", 18 | "Programming Language :: Python :: 3", 19 | "Framework :: FastAPI", 20 | "Operating System :: OS Independent", 21 | "Typing :: Typed", 22 | ] 23 | authors = [ 24 | {name = "Phil Elson"}, 25 | {name = "Ivan Sinkarenko"}, 26 | {name = "Francesco Iannaccone"}, 27 | {name = "Wouter Koorn"}, 28 | ] 29 | dependencies = [ 30 | "httpx", 31 | "fastapi>=0.100.0", 32 | "packaging", 33 | "uvicorn[standard]", 34 | "simple-repository>=0.10.0", 35 | ] 36 | readme = "README.md" 37 | description = "A tool for running a PEP-503 simple Python package repository, including features such as dist metadata (PEP-658) and JSON API (PEP-691)" 38 | 39 | [project.urls] 40 | Homepage = "https://github.com/simple-repository/simple-repository-server" 41 | 42 | [project.optional-dependencies] 43 | test = [ 44 | "pytest", 45 | "pytest_asyncio", 46 | "pytest_httpx", 47 | "starlette>=0.26.1", 48 | "pytest_httpserver", 49 | ] 50 | dev = [ 51 | "simple-repository-server[test]", 52 | ] 53 | 54 | [project.scripts] 55 | simple-repository-server = "simple_repository_server.__main__:main" 56 | [tool.setuptools_scm] 57 | version_file = "simple_repository_server/_version.py" 58 | 59 | [tool.isort] 60 | py_version = 39 61 | line_length = 100 62 | multi_line_output = 3 63 | include_trailing_comma = true 64 | force_grid_wrap = 0 65 | use_parentheses = true 66 | ensure_newline_before_comments = true 67 | force_sort_within_sections = true 68 | 69 | [tool.mypy] 70 | python_version = "3.11" 71 | exclude = "simple_repository_server/tests" 72 | ignore_missing_imports = false 73 | strict = true 74 | 75 | [tool.setuptools.packages.find] 76 | namespaces = false 77 | -------------------------------------------------------------------------------- /simple_repository_server/tests/test_netrc_auth.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import textwrap 3 | from unittest import mock 4 | 5 | from fastapi.testclient import TestClient 6 | import httpx 7 | import pytest 8 | 9 | from simple_repository_server.__main__ import create_app, get_netrc_path 10 | 11 | 12 | @pytest.fixture 13 | def netrc_file(tmp_path: Path) -> Path: 14 | """Create a temporary netrc file for testing.""" 15 | netrc = tmp_path / 'my-netrc' 16 | netrc.write_text( 17 | textwrap.dedent("""\n 18 | machine gitlab.example.com 19 | login deploy-token-123 20 | password glpat-xxxxxxxxxxxxxxxxxxxx 21 | """), 22 | ) 23 | return netrc 24 | 25 | 26 | @pytest.fixture 27 | def tmp_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: 28 | homedir = tmp_path / 'my-home' 29 | homedir.mkdir() 30 | monkeypatch.setattr(Path, 'home', lambda: homedir) 31 | return homedir 32 | 33 | 34 | def test_get_netrc__path_not_in_home(tmp_home: Path, netrc_file: Path): 35 | """Test get_netrc_path returns None when no netrc file exists.""" 36 | result = get_netrc_path() 37 | assert result is None 38 | 39 | 40 | def test_get_netrc__path_in_home(tmp_home: Path, netrc_file: Path): 41 | """Test get_netrc_path returns None when no netrc file exists.""" 42 | home_netrc = tmp_home / '.netrc' 43 | netrc_file.rename(home_netrc) 44 | result = get_netrc_path() 45 | assert result == home_netrc 46 | 47 | 48 | def test_get_netrc__netrc_env_var(netrc_file: Path, monkeypatch: pytest.MonkeyPatch): 49 | """Test get_netrc_path uses NETRC environment variable when file exists.""" 50 | monkeypatch.setenv('NETRC', str(netrc_file)) 51 | result = get_netrc_path() 52 | assert result == netrc_file 53 | 54 | 55 | def test_get_netrc__netrc_env_var_nonexistent(tmp_home: Path, netrc_file: Path, monkeypatch: pytest.MonkeyPatch): 56 | """Test get_netrc_path returns None when NETRC points to non-existent file (no fallback).""" 57 | # Create ~/.netrc in home directory 58 | home_netrc = tmp_home / '.netrc' 59 | netrc_file.rename(home_netrc) 60 | 61 | # Set NETRC to non-existent file 62 | monkeypatch.setenv('NETRC', str(tmp_home / 'doesnt_exist')) 63 | result = get_netrc_path() 64 | 65 | # Should return None, NOT fall back to ~/.netrc 66 | assert result is None 67 | 68 | 69 | def test_create_app__with_netrc(netrc_file: Path, monkeypatch: pytest.MonkeyPatch): 70 | monkeypatch.setenv('NETRC', str(netrc_file)) 71 | with mock.patch( 72 | 'simple_repository_server.__main__.create_repository', 73 | ) as mock_create_repository: 74 | app = create_app(["https://gitlab.example.com/simple/"]) 75 | 76 | # Create a test client which will trigger the lifespan context 77 | with TestClient(app): 78 | pass 79 | 80 | # Verify create_repository was called 81 | assert mock_create_repository.called 82 | args, kwargs = mock_create_repository.call_args 83 | 84 | http_client = kwargs['http_client'] 85 | 86 | # Verify it's an AsyncClient with NetRCAuth 87 | assert isinstance(http_client, httpx.AsyncClient) 88 | assert http_client._auth is not None 89 | assert isinstance(http_client._auth, httpx.NetRCAuth) 90 | -------------------------------------------------------------------------------- /simple_repository_server/_http_response_iterator.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | 8 | from __future__ import annotations 9 | 10 | import typing 11 | 12 | import httpx 13 | 14 | 15 | class HttpResponseIterator: 16 | """ 17 | A class providing a generator to iterate over response body bytes from an httpx request. 18 | 19 | This class creates an iterator that allows you to iterate over the bytes of a response body 20 | obtained from an httpx request. Additionally, it provides access to the response status code 21 | and headers before the streaming response is constructed. It is particularly designed to be 22 | used with Starlette's streaming responses, enabling access to headers and status code before 23 | the response is returned by an API endpoint. The class will keep the httpx session alive 24 | until the entire response content is accessed. 25 | """ 26 | 27 | PROXIED_REQUEST_HEADERS = { 28 | 'accept', 29 | 'user-agent', 30 | 'accept-encoding', 31 | 'if-unmodified-since', 32 | 'if-range', 'if-none-match', 33 | 'if-modified-since', 34 | 'if-match', 35 | 'range', 36 | 'referer', 37 | } 38 | 39 | def __init__(self, http_client: httpx.AsyncClient, url: str): 40 | """ 41 | Do not call the constructor of this class directly. 42 | Use StreamResponseIterator.create_iterator. 43 | """ 44 | self.http_client = http_client 45 | self.url: str = url 46 | self.status_code: int 47 | self.headers: typing.Mapping[str, str] 48 | self._agen: typing.AsyncGenerator[bytes, None] 49 | 50 | def __aiter__(self) -> HttpResponseIterator: 51 | return self 52 | 53 | async def __anext__(self) -> bytes: 54 | return await self._agen.__anext__() 55 | 56 | @classmethod 57 | async def create_iterator( 58 | cls, 59 | http_client: httpx.AsyncClient, 60 | url: str, 61 | *, 62 | request_headers: typing.Mapping[str, str] | None = None, 63 | ) -> HttpResponseIterator: 64 | iterator = HttpResponseIterator( 65 | http_client=http_client, 66 | url=url, 67 | ) 68 | request_headers = request_headers or {} 69 | headers = { 70 | header_name: header_value for header_name, header_value in request_headers.items() 71 | if header_name.lower() in cls.PROXIED_REQUEST_HEADERS 72 | } 73 | 74 | async def agenerator() -> typing.AsyncGenerator[bytes, None]: 75 | async with iterator.http_client.stream( 76 | method="GET", 77 | url=iterator.url, 78 | headers=headers, 79 | ) as resp: 80 | # Expose the response status and headers. 81 | iterator.status_code, iterator.headers = resp.status_code, resp.headers 82 | 83 | # The first time that anext is called, set status_code and 84 | # headers, without yielding the first byte of the stream. 85 | yield b"" 86 | async for chunk in resp.aiter_raw(1024 * 1024): 87 | yield chunk 88 | 89 | iterator._agen = agenerator() 90 | # Call anext to set the values of status_code and headers. 91 | await iterator.__anext__() 92 | 93 | return iterator 94 | -------------------------------------------------------------------------------- /simple_repository_server/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | 8 | from dataclasses import replace 9 | import json 10 | import re 11 | import typing 12 | from urllib.parse import urlparse 13 | 14 | import fastapi 15 | from packaging.version import InvalidVersion, Version 16 | from simple_repository import model 17 | 18 | 19 | def url_as_relative( 20 | destination_absolute_url: str, 21 | origin_absolute_url: str, 22 | ) -> str: 23 | """Converts, if possible, the destination_absolute_url to a relative to origin_absolute_url""" 24 | parsed_destination_url = urlparse(destination_absolute_url) 25 | parsed_origin_url = urlparse(origin_absolute_url) 26 | 27 | if ( 28 | parsed_origin_url.scheme != parsed_destination_url.scheme or 29 | parsed_origin_url.scheme not in ["http", "https"] or 30 | parsed_origin_url.netloc != parsed_destination_url.netloc 31 | ): 32 | raise ValueError( 33 | "Cannot create a relative url from " 34 | f"{origin_absolute_url} to {destination_absolute_url}", 35 | ) 36 | 37 | destination_absolute_path = parsed_destination_url.path 38 | origin_absolute_path = parsed_origin_url.path 39 | 40 | # Extract all the segments in the url contained between two "/" 41 | destination_path_tokens = destination_absolute_path.split("/")[1:-1] 42 | origin_path_tokens = origin_absolute_path.split("/")[1:-1] 43 | # Calculate the depth of the origin path. It will be the initial 44 | # number of dirs to delete from the url to get the relative path. 45 | dirs_up = len(origin_path_tokens) 46 | 47 | common_prefix = "/" 48 | for destination_path_token, origin_path_token in zip( 49 | destination_path_tokens, origin_path_tokens, 50 | ): 51 | if destination_path_token == origin_path_token: 52 | # If the two urls share a parent dir, reduce the number of dirs to delete 53 | dirs_up -= 1 54 | common_prefix += destination_path_token + "/" 55 | else: 56 | break 57 | 58 | return "../" * dirs_up + destination_absolute_path.removeprefix(common_prefix) 59 | 60 | 61 | def relative_url_for( 62 | request: fastapi.Request, 63 | name: str, 64 | **kwargs: typing.Any, 65 | ) -> str: 66 | origin_url = str(request.url) 67 | destination_url = str(request.url_for(name, **kwargs)) 68 | 69 | return url_as_relative( 70 | origin_absolute_url=origin_url, 71 | destination_absolute_url=destination_url, 72 | ) 73 | 74 | 75 | def replace_urls( 76 | project_page: model.ProjectDetail, 77 | project_name: str, 78 | request: fastapi.Request, 79 | ) -> model.ProjectDetail: 80 | files = tuple( 81 | replace( 82 | file, 83 | url=relative_url_for( 84 | request=request, 85 | name="resources", 86 | project_name=project_name, 87 | resource_name=file.filename, 88 | ), 89 | ) for file in project_page.files 90 | ) 91 | return replace(project_page, files=files) 92 | 93 | 94 | PIP_HEADER_REGEX = re.compile(r'^.*?{') 95 | 96 | 97 | def get_pip_version( 98 | request: fastapi.Request, 99 | ) -> Version | None: 100 | if not (pip_header_string := request.headers.get('user-agent', '')): 101 | return None 102 | pip_header = PIP_HEADER_REGEX.sub("{", pip_header_string) 103 | try: 104 | pip_info = json.loads(pip_header) 105 | except json.decoder.JSONDecodeError: 106 | return None 107 | if not isinstance(pip_info, dict): 108 | return None 109 | 110 | if implementation := pip_info.get('installer'): 111 | if isinstance(implementation, dict): 112 | version_string = implementation.get('version', '') 113 | try: 114 | return Version(version_string) 115 | except InvalidVersion: 116 | return None 117 | return None 118 | -------------------------------------------------------------------------------- /simple_repository_server/tests/test__http_response_iterator.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | 8 | from random import randbytes 9 | import typing 10 | import zlib 11 | 12 | import httpx 13 | import pytest 14 | from pytest_httpserver import HTTPServer 15 | from pytest_httpx import HTTPXMock 16 | 17 | from simple_repository_server._http_response_iterator import HttpResponseIterator 18 | 19 | 20 | @pytest.mark.asyncio 21 | async def test_http_response_iterator__request_headers_passed_through( 22 | httpx_mock: HTTPXMock, 23 | ) -> None: 24 | # Check that we can pass headers through to the proxied request. 25 | httpx_mock.add_response() 26 | 27 | http_client = httpx.AsyncClient() 28 | _ = await HttpResponseIterator.create_iterator( 29 | http_client, 30 | 'https://example.com/some/path', 31 | request_headers={'foo': 'bar', 'accept-encoding': 'wibble-wobble'}, 32 | ) 33 | 34 | request = httpx_mock.get_request() 35 | assert request is not None 36 | assert request.headers['accept-encoding'] == 'wibble-wobble' 37 | assert 'foo' not in request.headers 38 | 39 | 40 | _DEFLATE = zlib.compressobj(4, zlib.DEFLATED, -zlib.MAX_WBITS) 41 | 42 | 43 | @pytest.mark.parametrize( 44 | ['input_content'], 45 | [ 46 | ["This is the response content".encode('utf-8')], 47 | [randbytes(1024 * 1024 * 3)], # 3 pages of chunked content 48 | ], 49 | ids=['utf8_encoded_bytes', 'multi_page_bytestring'], 50 | ) 51 | @pytest.mark.parametrize( 52 | ['encoding_name', 'encoder', 'decoder'], 53 | [ 54 | ['gzip', zlib.compress, zlib.decompress], 55 | # See https://stackoverflow.com/a/22311297/741316 56 | [ 57 | 'deflate', 58 | lambda data: _DEFLATE.compress(data) + _DEFLATE.flush(), 59 | lambda data: zlib.decompress(data, -zlib.MAX_WBITS), 60 | ], 61 | ['never-seen-before', lambda data: data + b'abc', lambda data: data[:-3]], 62 | ], 63 | ids=['gzip', 'deflate', 'never-seen-before'], 64 | ) 65 | @pytest.mark.asyncio 66 | async def test_http_response_iterator__response_remains_gzipped( 67 | httpserver: HTTPServer, 68 | input_content: bytes, 69 | encoding_name: str, 70 | encoder: typing.Callable[[bytes], bytes], 71 | decoder: typing.Callable[[bytes], bytes], 72 | ) -> typing.Any: 73 | # Serve some content as compressed bytes, and ensure that we can stream it 74 | # through the iterator (with the correct headers etc.). 75 | # We use a real test http server, to ensure that we are genuinely handling 76 | # gzipped responses correctly. 77 | try: 78 | compressed = encoder(input_content) 79 | except zlib.error: 80 | return pytest.xfail(reason='Known zlib error') 81 | httpserver.expect_request('/path').respond_with_data( 82 | compressed, 83 | headers={ 84 | 'content-type': 'application/octet-stream', 85 | 'content-encoding': encoding_name, 86 | }, 87 | ) 88 | 89 | http_client = httpx.AsyncClient() 90 | response_it = await HttpResponseIterator.create_iterator( 91 | http_client, 92 | httpserver.url_for('/path'), 93 | request_headers={'foo': 'bar', 'accept-encoding': 'gzip'}, 94 | ) 95 | 96 | assert response_it.headers['content-type'] == 'application/octet-stream' 97 | assert response_it.headers['content-encoding'] == encoding_name 98 | assert int(response_it.headers['content-length']) == len(compressed) 99 | content = b''.join([chunk async for chunk in response_it]) 100 | assert len(content) == len(compressed) 101 | assert decoder(content) == input_content 102 | 103 | 104 | @pytest.mark.asyncio 105 | async def test_http_response_iterator__follows_redirects( 106 | httpserver: HTTPServer, 107 | ) -> None: 108 | # Test that the HttpResponseIterator follows redirects properly 109 | final_content = b"This is the final content after redirect" 110 | 111 | # Set up redirect chain: /redirect -> /final 112 | httpserver.expect_request('/final').respond_with_data( 113 | final_content, 114 | headers={'content-type': 'application/octet-stream'}, 115 | ) 116 | httpserver.expect_request('/redirect').respond_with_data( 117 | b"", 118 | status=302, 119 | headers={'location': httpserver.url_for('/final')}, 120 | ) 121 | 122 | http_client = httpx.AsyncClient(follow_redirects=True) 123 | response_it = await HttpResponseIterator.create_iterator( 124 | http_client, 125 | httpserver.url_for('/redirect'), 126 | ) 127 | 128 | # Should get the final content, not the redirect response 129 | assert response_it.status_code == 200 130 | content = b''.join([chunk async for chunk in response_it]) 131 | assert content == final_content 132 | -------------------------------------------------------------------------------- /simple_repository_server/tests/unit/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | 8 | from unittest import mock 9 | 10 | from packaging.version import Version 11 | import pytest 12 | from simple_repository import model 13 | from starlette.datastructures import URL 14 | 15 | from simple_repository_server import utils 16 | 17 | 18 | def test_replace_urls() -> None: 19 | page = model.ProjectDetail( 20 | meta=model.Meta("1.0"), 21 | name="numpy", 22 | files=(model.File("numpy-1.0-any.whl", "old_url", {}),), 23 | ) 24 | 25 | with mock.patch("simple_repository_server.utils.relative_url_for", return_value="new_url"): 26 | page = utils.replace_urls(page, "numpy", mock.Mock()) 27 | 28 | assert page.files == ( 29 | model.File("numpy-1.0-any.whl", "new_url", {}), 30 | ) 31 | 32 | 33 | @pytest.mark.parametrize( 34 | "origin, destination, result", [ 35 | ( 36 | "https://simple-repository/simple/numpy/", 37 | "https://simple-repository/resources/numpy/numpy-1.0.whl", 38 | "../../resources/numpy/numpy-1.0.whl", 39 | ), ( 40 | "https://simple-repository/simple/Numpy/", 41 | "https://simple-repository/simple/numpy/", 42 | "../numpy/", 43 | ), ( 44 | "https://simple-repository/simple/Numpy", 45 | "https://simple-repository/simple/numpy", 46 | "numpy", 47 | ), ( 48 | "https://simple-repository/simple/", 49 | "https://simple-repository/simple/numpy/", 50 | "numpy/", 51 | ), ( 52 | "https://simple-repository/simple/", 53 | "https://simple-repository/simple/", 54 | "", 55 | ), ( 56 | "https://simple-repository/simple", 57 | "https://simple-repository/simple", 58 | "simple", 59 | ), ( 60 | "https://simple-repository/simple", 61 | "https://simple-repository/simple/", 62 | "simple/", 63 | ), ( 64 | "https://simple-repository/simple/", 65 | "https://simple-repository/simple", 66 | "../simple", 67 | ), ( 68 | "https://simple-repository/simple/project/numpy", 69 | "https://simple-repository/simple/", 70 | "../", 71 | ), 72 | ], 73 | ) 74 | def test_url_as_relative(destination: str, origin: str, result: str) -> None: 75 | assert utils.url_as_relative( 76 | destination_absolute_url=destination, 77 | origin_absolute_url=origin, 78 | ) == result 79 | 80 | 81 | @pytest.mark.parametrize( 82 | "origin, destination", [ 83 | ( 84 | "http://simple-repository/simple/numpy/", 85 | "https://simple-repository/resources/numpy/numpy-1.0.whl", 86 | ), ( 87 | "https://simple-repository/simple/Numpy/", 88 | "https://simple-repository2/simple/numpy/", 89 | ), ( 90 | "https://simple-repository:81/simple/Numpy", 91 | "https://simple-repository:80/simple/numpy", 92 | ), ( 93 | "https://simple-repository/simple/numpy/", 94 | "../tensorflow", 95 | ), ( 96 | "../tensorflow", 97 | "https://simple-repository/simple/numpy/", 98 | ), 99 | ], 100 | ) 101 | def test_url_as_relative__invalid(origin: str, destination: str) -> None: 102 | with pytest.raises( 103 | ValueError, 104 | match=f"Cannot create a relative url from {origin} to {destination}", 105 | ): 106 | utils.url_as_relative( 107 | destination_absolute_url=destination, 108 | origin_absolute_url=origin, 109 | ) 110 | 111 | 112 | def test_relative_url_for() -> None: 113 | request_mock = mock.Mock( 114 | url=URL("https://url/number/one"), 115 | url_for=mock.Mock(return_value=URL("https://url/number/one")), 116 | ) 117 | url_as_relative_mock = mock.Mock() 118 | 119 | with mock.patch("simple_repository_server.utils.url_as_relative", url_as_relative_mock): 120 | utils.relative_url_for(request=request_mock, name="name") 121 | 122 | url_as_relative_mock.assert_called_once_with( 123 | origin_absolute_url="https://url/number/one", 124 | destination_absolute_url="https://url/number/one", 125 | ) 126 | 127 | 128 | @pytest.mark.parametrize( 129 | "header, version", [ 130 | ('pip/23.0.1 {"installer":{"name":"pip","version":"23.0.1"}}', Version("23.0.1")), 131 | ('{"installer":{"name":"pip","version":"23.0.1"}}', Version("23.0.1")), 132 | ('', None), 133 | ('*/*', None), 134 | ('pip/23.0.1 {"installer":{"name":"pip","version":"AAA"}}', None), 135 | ], 136 | ) 137 | def test_get_pip_version(header: str, version: Version | None) -> None: 138 | mock_request = mock.Mock(headers={"user-agent": header}) 139 | utils.get_pip_version(mock_request) == version 140 | -------------------------------------------------------------------------------- /simple_repository_server/__main__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | 8 | import argparse 9 | from contextlib import asynccontextmanager 10 | import logging 11 | import os 12 | from pathlib import Path 13 | import typing 14 | from urllib.parse import urlparse 15 | 16 | import fastapi 17 | from fastapi import FastAPI 18 | import httpx 19 | from simple_repository.components.core import SimpleRepository 20 | from simple_repository.components.http import HttpRepository 21 | from simple_repository.components.local import LocalRepository 22 | from simple_repository.components.metadata_injector import MetadataInjectorRepository 23 | from simple_repository.components.priority_selected import PrioritySelectedProjectsRepository 24 | import uvicorn 25 | 26 | from simple_repository_server.routers import simple 27 | 28 | 29 | def is_url(url: str) -> bool: 30 | return urlparse(url).scheme in ("http", "https") 31 | 32 | 33 | def get_netrc_path() -> typing.Optional[Path]: 34 | """ 35 | Get the netrc file path if it exists and is a regular file. 36 | Checks NETRC environment variable first, then ~/.netrc. 37 | Returns None if no valid netrc file is found. 38 | 39 | If NETRC is explicitly set but points to a non-existent or invalid file, 40 | returns None. 41 | """ 42 | netrc_env = os.environ.get('NETRC') 43 | if netrc_env: 44 | netrc_path = Path(netrc_env) 45 | if netrc_path.exists() and netrc_path.is_file(): 46 | return netrc_path 47 | # If NETRC is explicitly set but invalid, don't fall back to ~/.netrc 48 | return None 49 | 50 | default_netrc = Path.home() / '.netrc' 51 | if default_netrc.exists() and default_netrc.is_file(): 52 | return default_netrc 53 | 54 | return None 55 | 56 | 57 | def configure_parser(parser: argparse.ArgumentParser) -> None: 58 | parser.description = "Run a Python Package Index" 59 | 60 | parser.add_argument("--host", default="0.0.0.0") 61 | parser.add_argument("--port", type=int, default=8000) 62 | parser.add_argument( 63 | "--stream-http-resources", 64 | action="store_true", 65 | help="Stream HTTP resources through this server instead of redirecting (default: redirect)", 66 | ) 67 | parser.add_argument( 68 | "repository_url", metavar="repository-url", type=str, nargs="+", 69 | help="Repository URL (http/https) or local directory path", 70 | ) 71 | 72 | 73 | def create_repository( 74 | repository_urls: list[str], 75 | *, 76 | http_client: httpx.AsyncClient, 77 | ) -> SimpleRepository: 78 | base_repos: list[SimpleRepository] = [] 79 | repo: SimpleRepository 80 | for repo_url in repository_urls: 81 | if is_url(repo_url): 82 | repo = HttpRepository( 83 | url=repo_url, 84 | http_client=http_client, 85 | ) 86 | else: 87 | repo = LocalRepository( 88 | index_path=Path(repo_url), 89 | ) 90 | base_repos.append(repo) 91 | 92 | if len(base_repos) > 1: 93 | repo = PrioritySelectedProjectsRepository(base_repos) 94 | else: 95 | repo = base_repos[0] 96 | return MetadataInjectorRepository(repo, http_client) 97 | 98 | 99 | def create_app(repository_urls: list[str], *, stream_http_resources: bool = False) -> fastapi.FastAPI: 100 | @asynccontextmanager 101 | async def lifespan(app: FastAPI) -> typing.AsyncIterator[None]: 102 | # Configure httpx client with netrc support if netrc file exists 103 | netrc_path = get_netrc_path() 104 | auth: typing.Optional[httpx.Auth] = None 105 | if netrc_path: 106 | logging.info(f"Using netrc authentication from: {netrc_path}") 107 | auth = httpx.NetRCAuth(file=str(netrc_path)) 108 | 109 | async with httpx.AsyncClient(auth=auth, follow_redirects=True) as http_client: 110 | repo = create_repository(repository_urls, http_client=http_client) 111 | app.include_router( 112 | simple.build_router( 113 | repo, 114 | http_client=http_client, 115 | stream_http_resources=stream_http_resources, 116 | ), 117 | ) 118 | yield 119 | 120 | app = FastAPI( 121 | openapi_url=None, # Disables automatic OpenAPI documentation (Swagger & Redoc) 122 | lifespan=lifespan, 123 | ) 124 | return app 125 | 126 | 127 | def handler(args: typing.Any) -> None: 128 | host: str = args.host 129 | port: int = args.port 130 | repository_urls: list[str] = args.repository_url 131 | stream_http_resources: bool = args.stream_http_resources 132 | uvicorn.run( 133 | app=create_app(repository_urls, stream_http_resources=stream_http_resources), 134 | host=host, 135 | port=port, 136 | ) 137 | 138 | 139 | def main() -> None: 140 | logging.basicConfig(level=logging.INFO) 141 | parser = argparse.ArgumentParser() 142 | configure_parser(parser) 143 | args = parser.parse_args() 144 | handler(args) 145 | 146 | 147 | if __name__ == '__main__': 148 | main() 149 | -------------------------------------------------------------------------------- /simple_repository_server/tests/integration/test_repo_dependency_injection.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | import fastapi 4 | from fastapi.testclient import TestClient 5 | import httpx 6 | import pytest 7 | from simple_repository import model 8 | from simple_repository.components.core import SimpleRepository 9 | from simple_repository.tests.components.fake_repository import FakeRepository 10 | 11 | from simple_repository_server.routers import simple 12 | 13 | 14 | def create_app(repo: SimpleRepository, repo_factory: typing.Callable[..., SimpleRepository]) -> fastapi.FastAPI: 15 | app = fastapi.FastAPI(openapi_url=None) 16 | 17 | http_client = httpx.AsyncClient() 18 | app.include_router( 19 | simple.build_router( 20 | repo, 21 | http_client=http_client, 22 | prefix="/snapshot/{cutoff_date}/", 23 | repo_factory=repo_factory, 24 | ), 25 | ) 26 | 27 | return app 28 | 29 | 30 | @pytest.fixture 31 | def fake_repo() -> SimpleRepository: 32 | return FakeRepository( 33 | project_list=model.ProjectList(model.Meta("1.0"), [model.ProjectListElement("foo-bar")]), 34 | project_pages=[ 35 | model.ProjectDetail( 36 | model.Meta('1.1'), 37 | "foo-bar", 38 | files=( 39 | model.File("foo_bar-2.0-any.whl", "", {}, size=1), 40 | model.File("foo_bar-3.0-any.whl", "", {}, size=1), 41 | ), 42 | ), 43 | ], 44 | ) 45 | 46 | 47 | @pytest.fixture 48 | def empty_repo() -> SimpleRepository: 49 | return FakeRepository() 50 | 51 | 52 | class SimpleFactoryWithParams: 53 | def __init__(self, repo: SimpleRepository): 54 | self.cutoff_date = None 55 | self.repo = repo 56 | 57 | def __call__(self, cutoff_date: str) -> SimpleRepository: 58 | self.cutoff_date = cutoff_date 59 | # In this factory, just return the original repo, but we return a 60 | # more specific repo here. 61 | return self.repo 62 | 63 | 64 | @pytest.fixture 65 | def repo_factory(fake_repo: SimpleRepository) -> SimpleFactoryWithParams: 66 | return SimpleFactoryWithParams(repo=fake_repo) 67 | 68 | 69 | @pytest.mark.asyncio 70 | async def test_repo_with_dependency_injection__projects_list( 71 | empty_repo: SimpleRepository, 72 | repo_factory: SimpleFactoryWithParams, 73 | ): 74 | app = create_app(empty_repo, repo_factory=repo_factory) 75 | client = TestClient(app) 76 | response = client.get("/snapshot/2020-10-12/?format=application/vnd.pypi.simple.v1+json") 77 | 78 | # Check that the factory was called with the expected args. 79 | assert repo_factory.cutoff_date == "2020-10-12" 80 | 81 | # And that the response is not for the empty repo, but the factory one. 82 | assert response.status_code == 200 83 | assert response.headers['content-type'] == 'application/vnd.pypi.simple.v1+json' 84 | assert response.json() == { 85 | "meta": { 86 | "api-version": "1.0", 87 | }, 88 | "projects": [ 89 | { 90 | "name": "foo-bar", 91 | }, 92 | ], 93 | } 94 | 95 | 96 | @pytest.mark.asyncio 97 | async def test_repo_with_dependency_injection__project_page( 98 | empty_repo: SimpleRepository, 99 | repo_factory: SimpleFactoryWithParams, 100 | ): 101 | app = create_app(empty_repo, repo_factory=repo_factory) 102 | client = TestClient(app) 103 | response = client.get("/snapshot/2020-10-12/foo-bar/?format=application/vnd.pypi.simple.v1+json") 104 | 105 | # Check that the factory was called with the expected args. 106 | assert repo_factory.cutoff_date == "2020-10-12" 107 | 108 | assert response.status_code == 200 109 | assert response.headers['content-type'] == 'application/vnd.pypi.simple.v1+json' 110 | 111 | response_data = response.json() 112 | expected_data = { 113 | "meta": { 114 | "api-version": "1.1", 115 | }, 116 | "name": "foo-bar", 117 | "files": [ 118 | { 119 | "filename": "foo_bar-2.0-any.whl", 120 | "url": "../../../resources/foo-bar/foo_bar-2.0-any.whl", 121 | "hashes": {}, 122 | "size": 1, 123 | }, 124 | { 125 | "filename": "foo_bar-3.0-any.whl", 126 | "url": "../../../resources/foo-bar/foo_bar-3.0-any.whl", 127 | "hashes": {}, 128 | "size": 1, 129 | }, 130 | ], 131 | "versions": [ 132 | "2.0", 133 | "3.0", 134 | ], 135 | } 136 | 137 | # The version sort order is not currently deterministic, so test that separately. 138 | assert set(response_data.pop("versions")) == set(expected_data.pop("versions")) 139 | assert response_data == expected_data 140 | 141 | 142 | @pytest.mark.asyncio 143 | async def test_repo_with_dependency_injection__project_page__redirect( 144 | empty_repo: SimpleRepository, 145 | repo_factory: SimpleFactoryWithParams, 146 | ): 147 | app = create_app(empty_repo, repo_factory=repo_factory) 148 | client = TestClient(app) 149 | response = client.get( 150 | "/snapshot/2020-10-12/foo_Bar/?format=application/vnd.pypi.simple.v1+json", 151 | follow_redirects=False, 152 | ) 153 | 154 | # Check that the factory was called with the expected args. 155 | assert repo_factory.cutoff_date == "2020-10-12" 156 | 157 | assert response.status_code == 301 158 | # Ensure that we maintain the querystring. 159 | assert response.headers['location'] == '../foo-bar/?format=application/vnd.pypi.simple.v1+json' 160 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # simple-repository-server 2 | 3 | A tool for running a PEP-503 simple Python package repository, including features such as dist metadata (PEP-658) and JSON API (PEP-691) 4 | 5 | ## Installation 6 | 7 | ```bash 8 | python -m pip install simple-repository-server 9 | ``` 10 | 11 | ## Usage 12 | 13 | The ``simple-repository-server`` is designed to be used as a library, but also includes a convenient command line interface for running 14 | a simple repository service: 15 | 16 | ```bash 17 | $ simple-repository-server --help 18 | usage: simple-repository-server [-h] [--port PORT] repository-url [repository-url ...] 19 | 20 | Run a Simple Repository Server 21 | 22 | positional arguments: 23 | repository-url Repository URL (http/https) or local directory path 24 | 25 | options: 26 | -h, --help show this help message and exit 27 | --port PORT 28 | --stream-http-resources 29 | Stream HTTP resources through this server instead of redirecting (default: redirect) 30 | ``` 31 | 32 | The simplest example of this is to simply mirror the Python Package Index: 33 | 34 | ```bash 35 | python -m simple_repository_server https://pypi.org/simple/ 36 | ``` 37 | 38 | This will run a server (on port 8000 by default), you can then use it with `pip` or `uv` with the 39 | appropriate configuration, for example: 40 | 41 | ```bash 42 | export PIP_INDEX_URL=http://localhost:8000/simple/ 43 | pip install some-package-to-install 44 | ``` 45 | 46 | Or with `uv`: 47 | 48 | ```bash 49 | export UV_INDEX_URL=http://localhost:8000/simple/ 50 | uv pip install some-package-to-install 51 | ``` 52 | 53 | ## Server capabilities 54 | 55 | If multiple repositories are provided to the CLI, the ``PrioritySelectedProjectsRepository`` component will be used to 56 | combine them together in a way that mitigates the [dependency confusion attack](https://medium.com/@alex.birsan/dependency-confusion-4a5d60fec610), with the first declared repository having the highest priority. 57 | 58 | The server handles PEP-691 content negotiation to serve either HTML or JSON formats. 59 | Per PEP-691, the default (fallback) content type is HTML, but a JSON response can 60 | be previewed in the browser by adding the ``?format=application/vnd.pypi.simple.v1+json`` 61 | querystring to any of the repository URLs. 62 | 63 | The server has been configured to include PEP-658 metadata, even if the upstream repository does 64 | not include such metadata. This is done on the fly, and as a result the distribution will be 65 | temporarily downloaded (in the case of http) to the server in order to extract and serve the metadata. 66 | 67 | It is possible to use the resulting repository as input for the 68 | [``simple-repository-browser``](https://github.com/simple-repository/simple-repository-browser), which 69 | offers a web interface to browse and search packages in any simple package repository (PEP-503), 70 | inspired by PyPI / warehouse. 71 | 72 | It is expected that as new features appear in the underlying ``simple-repository`` library, those 73 | which make general sense to enable by default will be introduced into the CLI without providing a 74 | mechanism to disable those features. For more control, please see the "Non CLI usage" section. 75 | 76 | ## Repository sources 77 | 78 | The server can work with both remote repositories and local directories: 79 | 80 | ```bash 81 | # Remote repository 82 | python -m simple_repository_server https://pypi.org/simple/ 83 | 84 | # Local directory 85 | python -m simple_repository_server /path/to/local/packages/ 86 | 87 | # Multiple sources (priority order, local having precedence) 88 | python -m simple_repository_server /path/to/local/packages/ https://pypi.org/simple/ 89 | ``` 90 | 91 | Local directories should be organised with each project in its own subdirectory using the 92 | canonical package name (lowercase, with hyphens instead of underscores): 93 | 94 | ``` 95 | /path/to/local/packages/ 96 | ├── my-package/ 97 | │ ├── my_package-1.0.0-py3-none-any.whl 98 | │ └── my-package-1.0.0.tar.gz 99 | └── another-package/ 100 | └── another_package-2.1.0-py3-none-any.whl 101 | ``` 102 | 103 | If metadata files are in the local repository they will be served directly, otherwise they 104 | will be extracted on-the-fly and served. 105 | 106 | ## Authentication 107 | 108 | The server automatically supports netrc-based authentication for private http repositories. 109 | If a `.netrc` file exists in your home directory or is specified via the `NETRC` environment 110 | variable, the server will use those credentials when accessing HTTP repositories. 111 | 112 | ## Resource handling 113 | 114 | By default, HTTP resource requests (e.g. wheel downloads) are redirected to their original URLs 115 | (302 redirect). 116 | To stream resources through the server instead, use the `--stream-http-resources` CLI flag. 117 | 118 | **Redirecting (default) is suitable for:** 119 | - Most public repository scenarios 120 | - When bandwidth and server processing overhead are considerations 121 | 122 | **Streaming is useful for:** 123 | - Air-gapped environments where clients cannot access upstream URLs directly 124 | - When the server has authentication credentials that clients lack 125 | 126 | ## Non CLI usage 127 | 128 | This project provides a number of tools in order to build a repository service using FastAPI. 129 | For cases when control of the repository configuration is required, and where details of the 130 | ASGI environment need more precise control, it is expected that ``simple-repository-server`` is used 131 | as a library instead of a CLI. 132 | 133 | Currently, the API for this functionality is under development, and will certainly change in the 134 | future. 135 | 136 | ## License and Support 137 | 138 | This code has been released under the MIT license. 139 | It is an initial prototype which is developed in-house, and _not_ currently openly developed. 140 | 141 | It is hoped that the release of this prototype will trigger interest from other parties that have similar needs. 142 | With sufficient collaborative interest there is the potential for the project to be openly 143 | developed, and to power Python package repositories across many domains. 144 | 145 | Please get in touch at https://github.com/orgs/simple-repository/discussions to share how 146 | this project may be useful to you. This will help us to gauge the level of interest and 147 | provide valuable insight when deciding whether to commit future resources to the project. 148 | -------------------------------------------------------------------------------- /simple_repository_server/routers/simple.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | 8 | import functools 9 | import hashlib 10 | import typing 11 | 12 | from fastapi import APIRouter, Depends, HTTPException 13 | import fastapi.params 14 | from fastapi.responses import ( 15 | FileResponse, 16 | HTMLResponse, 17 | PlainTextResponse, 18 | RedirectResponse, 19 | Response, 20 | StreamingResponse, 21 | ) 22 | import httpx 23 | import packaging.utils 24 | import packaging.version 25 | from simple_repository import SimpleRepository, content_negotiation, errors, model, serializer 26 | 27 | from .. import utils 28 | from .._http_response_iterator import HttpResponseIterator 29 | 30 | 31 | def get_response_format( 32 | request: fastapi.Request, 33 | format: str | None = None, 34 | ) -> content_negotiation.Format: 35 | """ 36 | A fastapi dependent which can optionally enable a PEP-691 format querystring, 37 | for example: 38 | 39 | /simple/some-project/?format=application/vnd.pypi.simple.v1+json 40 | 41 | """ 42 | if format: 43 | # Allow the consumer to request a format as a query string such as 44 | # {URL}?format=application/vnd.pypi.simple.v1+json 45 | # Note: + in urls are interpreted as spaces by 46 | # urllib.parse.parse_qsl, used by FastAPI. 47 | requested_format = format.replace(" ", "+") 48 | else: 49 | requested_format = request.headers.get("Accept", "") 50 | 51 | try: 52 | response_format = content_negotiation.select_response_format( 53 | content_type=requested_format, 54 | ) 55 | except errors.UnsupportedSerialization as e: 56 | raise HTTPException(status_code=406, detail=str(e)) 57 | 58 | return response_format 59 | 60 | 61 | def build_router( 62 | resource_repository: SimpleRepository, 63 | *, 64 | http_client: httpx.AsyncClient, 65 | prefix: str = "/simple/", 66 | repo_factory: typing.Optional[typing.Callable[..., SimpleRepository]] = None, 67 | stream_http_resources: bool = False, 68 | ) -> APIRouter: 69 | """ 70 | Build a FastAPI router for the given repository and http_client. 71 | 72 | Note that for the simple end-points, the repository is an injected 73 | dependency, meaning that you can add your own dependencies into the repository 74 | (see the test_repo_dependency_injection for an example of this). 75 | 76 | """ 77 | if not prefix.endswith("/"): 78 | raise ValueError("Prefix must end in '/'") 79 | 80 | if repo_factory is None: 81 | # If no repo factory is provided, use the same repository that we want to 82 | # use for resource handling. 83 | def repo_factory() -> SimpleRepository: 84 | return resource_repository 85 | 86 | simple_router = APIRouter( 87 | tags=["simple"], 88 | default_response_class=HTMLResponse, 89 | ) 90 | #: To be fixed by https://github.com/tiangolo/fastapi/pull/2763 91 | get = functools.partial(simple_router.api_route, methods=["HEAD", "GET"]) 92 | 93 | @get(prefix) 94 | async def project_list( 95 | response_format: typing.Annotated[content_negotiation.Format, Depends(get_response_format)], 96 | repository: typing.Annotated[SimpleRepository, Depends(repo_factory)], 97 | ) -> Response: 98 | project_list = await repository.get_project_list() 99 | 100 | serialized_project_list = serializer.serialize( 101 | page=project_list, 102 | format=response_format, 103 | ) 104 | 105 | return Response( 106 | serialized_project_list, 107 | media_type=response_format.value, 108 | ) 109 | 110 | @get(prefix + "{project_name}/") 111 | async def simple_project_page( 112 | request: fastapi.Request, 113 | project_name: str, 114 | repository: typing.Annotated[SimpleRepository, Depends(repo_factory)], 115 | response_format: typing.Annotated[content_negotiation.Format, Depends(get_response_format)], 116 | ) -> Response: 117 | normed_prj_name = packaging.utils.canonicalize_name(project_name) 118 | if normed_prj_name != project_name: 119 | # Update the original path params with the normed name. 120 | path_params = request.path_params | {'project_name': normed_prj_name} 121 | correct_url = utils.relative_url_for( 122 | request=request, 123 | name="simple_project_page", 124 | **path_params, 125 | ) 126 | if request.url.query: 127 | correct_url = correct_url + "?" + request.url.query 128 | return RedirectResponse( 129 | url=correct_url, 130 | status_code=301, 131 | ) 132 | 133 | try: 134 | package_releases = await repository.get_project_page(project_name) 135 | except errors.PackageNotFoundError as e: 136 | raise HTTPException(404, str(e)) 137 | 138 | # Point all resource URLs to this router. The router may choose to redirect these 139 | # back to the original source, but this means that all resource requests go through 140 | # this server (it may be desirable to be able to disable this behaviour in the 141 | # future, though it would mean that there is the potential for a SimpleRepository 142 | # to have implemented a resource handler, yet it never sees the request). 143 | project_releases = utils.replace_urls(package_releases, project_name, request) 144 | 145 | serialized_project_page = serializer.serialize( 146 | page=project_releases, 147 | format=response_format, 148 | ) 149 | return Response( 150 | serialized_project_page, 151 | media_type=response_format.value, 152 | ) 153 | 154 | @get("/resources/{project_name}/{resource_name}") 155 | async def resources( 156 | request: fastapi.Request, 157 | resource_name: str, 158 | project_name: str, 159 | ) -> fastapi.Response: 160 | 161 | req_ctx = model.RequestContext( 162 | context=dict(request.headers.items()), 163 | ) 164 | 165 | try: 166 | resource = await resource_repository.get_resource( 167 | project_name, 168 | resource_name, 169 | request_context=req_ctx, 170 | ) 171 | except errors.ResourceUnavailable as e: 172 | raise HTTPException(status_code=404, detail=str(e)) from e 173 | except errors.InvalidPackageError as e: 174 | raise HTTPException(status_code=502, detail=str(e)) from e 175 | 176 | if isinstance(resource, model.TextResource): 177 | # Use the first 12 characters of the metadata digest as ETag 178 | text_hash = hashlib.sha256(resource.text.encode('UTF-8')).hexdigest()[:12] 179 | etag = f'"{text_hash}"' 180 | response_headers = {'ETag': etag} 181 | if etag == request.headers.get("if-none-match"): 182 | raise HTTPException( 183 | 304, 184 | headers=response_headers, 185 | ) 186 | return PlainTextResponse( 187 | content=resource.text, 188 | headers=response_headers, 189 | ) 190 | 191 | if isinstance(resource, model.HttpResource): 192 | if stream_http_resources: 193 | response_iterator = await HttpResponseIterator.create_iterator( 194 | http_client=http_client, 195 | url=resource.url, 196 | request_headers=request.headers, 197 | ) 198 | return StreamingResponse( 199 | content=response_iterator, 200 | status_code=response_iterator.status_code, 201 | headers=response_iterator.headers, 202 | ) 203 | else: 204 | return RedirectResponse(url=resource.url, status_code=302) 205 | 206 | if isinstance(resource, model.LocalResource): 207 | ctx_etag = resource.context.get("etag") 208 | response_headers = {"ETag": ctx_etag} if ctx_etag else {} 209 | if client_etag := request.headers.get("if-none-match"): 210 | if client_etag == ctx_etag: 211 | raise HTTPException( 212 | 304, 213 | headers=response_headers, 214 | ) 215 | return FileResponse( 216 | path=resource.path, 217 | media_type="application/octet-stream", 218 | headers=response_headers, 219 | ) 220 | 221 | raise ValueError("Unsupported resource type") 222 | 223 | return simple_router 224 | -------------------------------------------------------------------------------- /simple_repository_server/tests/api/test_simple_router.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, CERN 2 | # This software is distributed under the terms of the MIT 3 | # licence, copied verbatim in the file "LICENSE". 4 | # In applying this license, CERN does not waive the privileges and immunities 5 | # granted to it by virtue of its status as Intergovernmental Organization 6 | # or submit itself to any jurisdiction. 7 | 8 | import pathlib 9 | import typing 10 | from unittest import mock 11 | 12 | from fastapi import FastAPI 13 | import httpx 14 | import pytest 15 | from pytest_httpx import HTTPXMock 16 | from simple_repository import errors, model 17 | from simple_repository.components.core import SimpleRepository 18 | from starlette.testclient import TestClient 19 | 20 | import simple_repository_server.routers.simple as simple_router 21 | 22 | 23 | @pytest.fixture 24 | def mock_repo() -> mock.AsyncMock: 25 | mock_repo = mock.AsyncMock(spec=SimpleRepository) 26 | return mock_repo 27 | 28 | 29 | @pytest.fixture 30 | def client(tmp_path: pathlib.PosixPath, mock_repo: mock.AsyncMock) -> typing.Generator[TestClient, None, None]: 31 | app = FastAPI() 32 | http_client = httpx.AsyncClient() 33 | app.include_router(simple_router.build_router(mock_repo, http_client=http_client)) 34 | 35 | with TestClient(app) as test_client: 36 | yield test_client 37 | 38 | 39 | @pytest.mark.parametrize( 40 | "headers", [{}, {"Accept": "text/html"}, {"Accept": "*/*"}], 41 | ) 42 | def test_simple_project_list(client: TestClient, headers: dict[str, str], mock_repo: mock.AsyncMock) -> None: 43 | assert isinstance(client.app, FastAPI) 44 | mock_repo.get_project_list.return_value = model.ProjectList( 45 | meta=model.Meta("1.0"), 46 | projects=frozenset([ 47 | model.ProjectListElement("a"), 48 | ]), 49 | ) 50 | 51 | expected = """ 52 | 53 | 54 | 55 | Simple index 56 | 57 | 58 | a
59 | 60 | """ 61 | 62 | response = client.get("/simple/", headers=headers) 63 | assert response.status_code == 200 64 | assert response.text == expected 65 | 66 | 67 | @pytest.mark.parametrize( 68 | "headers", [{}, {"Accept": "text/html"}, {"Accept": "*/*"}], 69 | ) 70 | def test_simple_project_page(client: TestClient, headers: dict[str, str], mock_repo: mock.AsyncMock) -> None: 71 | assert isinstance(client.app, FastAPI) 72 | mock_repo.get_project_page.return_value = model.ProjectDetail( 73 | meta=model.Meta("1.0"), 74 | name="name", 75 | files=(model.File("name.whl", "original_url", {}),), 76 | ) 77 | 78 | expected = """ 79 | 80 | 81 | 82 | Links for name 83 | 84 | 85 |

Links for name

86 | name.whl
87 | 88 | """ 89 | 90 | response = client.get("/simple/name/", headers=headers) 91 | assert response.status_code == 200 92 | assert response.text == expected 93 | 94 | 95 | def test_simple_package_releases__not_normalized(client: TestClient, mock_repo: mock.AsyncMock) -> None: 96 | assert isinstance(client.app, FastAPI) 97 | response = client.get("/simple/not_Normalized/", follow_redirects=False) 98 | assert response.status_code == 301 99 | assert response.headers['location'] == '../not-normalized/' 100 | 101 | 102 | def test_simple_package_releases__no_trailing_slash(client: TestClient, mock_repo: mock.AsyncMock) -> None: 103 | assert isinstance(client.app, FastAPI) 104 | response = client.get("/simple/some-project", follow_redirects=False) 105 | assert response.status_code == 307 # Provided by FastAPI itself 106 | 107 | 108 | @pytest.mark.asyncio 109 | async def test_simple_package_releases__package_not_found(client: TestClient, mock_repo: mock.AsyncMock) -> None: 110 | assert isinstance(client.app, FastAPI) 111 | mock_repo.get_project_page.side_effect = errors.PackageNotFoundError( 112 | package_name="ghost", 113 | ) 114 | 115 | response = client.get("/simple/ghost") 116 | assert response.status_code == 404 117 | assert response.json() == { 118 | 'detail': "Package 'ghost' was not found in " 119 | "the configured source", 120 | } 121 | 122 | 123 | def test_get_resource__http_redirect(mock_repo: mock.AsyncMock) -> None: 124 | mock_repo.get_resource.return_value = model.HttpResource( 125 | url="http://my_url", 126 | ) 127 | 128 | http_client = httpx.AsyncClient() 129 | app = FastAPI() 130 | app.include_router(simple_router.build_router(mock_repo, http_client=http_client)) 131 | client = TestClient(app) 132 | 133 | response = client.get("/resources/numpy/numpy-1.0-ciao.whl", follow_redirects=False) 134 | assert response.status_code == 302 135 | assert response.headers["location"] == "http://my_url" 136 | 137 | 138 | def test_get_resource__http_streaming(mock_repo: mock.AsyncMock, httpx_mock: HTTPXMock) -> None: 139 | mock_repo.get_resource.return_value = model.HttpResource( 140 | url="http://my_url", 141 | ) 142 | 143 | httpx_mock.add_response( 144 | status_code=201, 145 | headers={"my_header": "header"}, 146 | text="b1b2b3", 147 | ) 148 | http_client = httpx.AsyncClient() 149 | app = FastAPI() 150 | app.include_router(simple_router.build_router(mock_repo, http_client=http_client, stream_http_resources=True)) 151 | client = TestClient(app) 152 | 153 | response = client.get("/resources/numpy/numpy-1.0-ciao.whl", follow_redirects=False) 154 | assert response.status_code == 201 155 | assert response.headers.get("my_header") == "header" 156 | assert response.text == "b1b2b3" 157 | 158 | 159 | def test_get_resource_not_found(client: TestClient, mock_repo: mock.AsyncMock) -> None: 160 | assert isinstance(client.app, FastAPI) 161 | mock_repo.get_resource.side_effect = errors.ResourceUnavailable("resource_name") 162 | response = client.get("/resources/numpy/numpy1.0.whl") 163 | assert response.status_code == 404 164 | 165 | 166 | def test_unsupported_serialization(client: TestClient) -> None: 167 | response = client.get("/simple/", headers={"accept": "pizza/margherita"}) 168 | assert response.status_code == 406 169 | 170 | response = client.get("/simple/numpy/", headers={"accept": "application/vnd.pypi.simple.v2+html"}) 171 | assert response.status_code == 406 172 | 173 | 174 | def test_simple_project_page_json(client: TestClient, mock_repo: mock.AsyncMock) -> None: 175 | assert isinstance(client.app, FastAPI) 176 | mock_repo.get_project_page.return_value = model.ProjectDetail( 177 | meta=model.Meta("1.0"), 178 | name="name", 179 | files=(model.File("name.whl", "url", {}),), 180 | ) 181 | 182 | expected = ( 183 | '{"meta": {"api-version": "1.0"}, "name": "name",' 184 | ' "files": [{"filename": "name.whl", "url": ' 185 | '"../../resources/name/name.whl", "hashes": {}}]}' 186 | ) 187 | 188 | response = client.get("/simple/name/", headers={"accept": "application/vnd.pypi.simple.v1+json"}) 189 | assert response.status_code == 200 190 | assert response.text == expected 191 | assert response.headers["Content-Type"] == "application/vnd.pypi.simple.v1+json" 192 | 193 | 194 | def test_simple_project_list_json(client: TestClient, mock_repo: mock.AsyncMock) -> None: 195 | assert isinstance(client.app, FastAPI) 196 | mock_repo.get_project_list.return_value = model.ProjectList( 197 | meta=model.Meta("1.0"), 198 | projects=frozenset([ 199 | model.ProjectListElement("a"), 200 | ]), 201 | ) 202 | 203 | expected = '{"meta": {"api-version": "1.0"}, "projects": [{"name": "a"}]}' 204 | 205 | response = client.get("/simple/", headers={"accept": "application/vnd.pypi.simple.v1+json"}) 206 | assert response.status_code == 200 207 | assert response.text == expected 208 | 209 | 210 | @pytest.mark.parametrize( 211 | "url_format", [ 212 | "application/vnd.pypi.simple.v1+json", 213 | "application/vnd.pypi.simple.v1+html", 214 | ], 215 | ) 216 | def test_simple_project_page__json_url_params( 217 | client: TestClient, 218 | url_format: str, 219 | mock_repo: mock.AsyncMock, 220 | ) -> None: 221 | assert isinstance(client.app, FastAPI) 222 | mock_repo.get_project_page.return_value = model.ProjectDetail( 223 | meta=model.Meta("1.0"), 224 | name="name", 225 | files=(model.File("name.whl", "url", {}),), 226 | ) 227 | 228 | response = client.get(f"/simple/name/?format={url_format}") 229 | assert response.headers.get("content-type") == url_format 230 | 231 | 232 | @pytest.mark.parametrize( 233 | "url_format", [ 234 | "application/vnd.pypi.simple.v1+json", 235 | "application/vnd.pypi.simple.v1+html", 236 | ], 237 | ) 238 | def test_simple_project_list__json_url_params( 239 | client: TestClient, 240 | url_format: str, 241 | mock_repo: mock.AsyncMock, 242 | ) -> None: 243 | assert isinstance(client.app, FastAPI) 244 | mock_repo.get_project_list.return_value = model.ProjectList( 245 | meta=model.Meta("1.0"), 246 | projects=frozenset([ 247 | model.ProjectListElement("a"), 248 | ]), 249 | ) 250 | 251 | response = client.get(f"/simple/?format={url_format}") 252 | assert response.headers.get("content-type") == url_format 253 | 254 | 255 | @pytest.mark.parametrize( 256 | ['headers', 'expected_return_code'], 257 | [ 258 | [{}, 200], 259 | [{"If-None-Match": '"45447b7afbd5"'}, 304], 260 | [{"If-None-Match": '"not-the-etag"'}, 200], 261 | ], 262 | ) 263 | def test_get_resource__metadata( 264 | client: TestClient, 265 | mock_repo: mock.AsyncMock, 266 | headers: dict[str, str], 267 | expected_return_code: int, 268 | ) -> None: 269 | assert isinstance(client.app, FastAPI) 270 | mock_repo.get_resource.return_value = model.TextResource( 271 | text="metadata", 272 | ) 273 | expected_etag = '"45447b7afbd5"' 274 | 275 | response = client.get("/resources/numpy/numpy-1.0-ciao.whl.metadata", headers=headers) 276 | assert response.status_code == expected_return_code 277 | # The etag must always be returned, see the following for details: 278 | # https://github.com/simple-repository/simple-repository-server/issues/6#issue-2317360891 279 | assert response.headers.get("etag") == expected_etag 280 | 281 | 282 | @pytest.mark.parametrize( 283 | ['headers', 'expected_return_code'], 284 | [ 285 | [{}, 200], 286 | [{"If-None-Match": '"430fddbf0a7ab4aebc1389262dbe2404"'}, 304], 287 | [{"If-None-Match": '"not-the-etag"'}, 200], 288 | ], 289 | ) 290 | def test_get_resource__local( 291 | client: TestClient, 292 | mock_repo: mock.AsyncMock, 293 | tmp_path: pathlib.Path, 294 | headers: dict[str, str], 295 | expected_return_code: int, 296 | ) -> None: 297 | local_resource = tmp_path / "my_file" 298 | local_resource.write_text("hello!") 299 | expected_tag = '"430fddbf0a7ab4aebc1389262dbe2404"' 300 | 301 | assert isinstance(client.app, FastAPI) 302 | mock_repo.get_resource.return_value = model.LocalResource( 303 | path=local_resource, 304 | context=model.Context(etag=expected_tag), 305 | ) 306 | 307 | response = client.get("/resources/numpy/numpy-1.0-ciao.whl", headers=headers) 308 | assert response.status_code == expected_return_code 309 | # The etag must always be returned, see the following for details: 310 | # https://github.com/simple-repository/simple-repository-server/issues/6#issue-2317360891 311 | assert response.headers.get("etag") == expected_tag 312 | --------------------------------------------------------------------------------