├── .activate.sh
├── .deactivate.sh
├── .github
└── workflows
│ ├── ci.yaml
│ └── publish-github-pages.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── Makefile
├── RATIONALE.md
├── README.md
├── dumb_pypi
├── __init__.py
├── main.py
└── templates
│ ├── _base.html
│ ├── changelog.html
│ ├── index.html
│ ├── package.html
│ └── simple.html
├── requirements-dev.txt
├── setup.cfg
├── setup.py
├── testing.py
├── testing
├── extract-links
├── package-list
├── package-list-huge
├── package-list-json
├── package-list-to-fake-json
├── previous-package-list-json
└── random-packages
├── tests
├── __init__.py
├── conftest.py
├── integration_test.py
└── main_test.py
└── tox.ini
/.activate.sh:
--------------------------------------------------------------------------------
1 | venv/bin/activate
--------------------------------------------------------------------------------
/.deactivate.sh:
--------------------------------------------------------------------------------
1 | deactivate
2 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches: [master]
4 | tags: '*'
5 | pull_request:
6 | jobs:
7 | tox:
8 | runs-on: ubuntu-latest
9 | strategy:
10 | fail-fast: false
11 | matrix:
12 | python: ['3.9', '3.10', '3.11']
13 | steps:
14 | - uses: actions/checkout@v3
15 | - uses: actions/setup-python@v4
16 | with:
17 | python-version: ${{ matrix.python }}
18 | - run: pip install tox
19 | - run: tox -e py
20 |
--------------------------------------------------------------------------------
/.github/workflows/publish-github-pages.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches: [master]
4 | jobs:
5 | publish:
6 | runs-on: ubuntu-latest
7 | steps:
8 | - uses: actions/checkout@v2
9 | - uses: actions/setup-python@v2
10 | with:
11 | python-version: 3.9
12 | - run: pip install tox
13 | - run: make test-repo
14 | - name: Deploy to GitHub Pages
15 | uses: peaceiris/actions-gh-pages@v3
16 | with:
17 | github_token: ${{ secrets.GITHUB_TOKEN }}
18 | publish_dir: 'test-repo'
19 | destination_dir: 'test-repo'
20 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | *.py[co]
3 | /.coverage
4 | /.tox
5 | /build
6 | /dist
7 | /gh-pages
8 | /test-repo
9 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | exclude: ^vendor/
2 | repos:
3 | - repo: https://github.com/pre-commit/pre-commit-hooks
4 | rev: v5.0.0
5 | hooks:
6 | - id: check-docstring-first
7 | - id: check-json
8 | - id: check-yaml
9 | - id: debug-statements
10 | - id: end-of-file-fixer
11 | - id: name-tests-test
12 | - id: requirements-txt-fixer
13 | - id: trailing-whitespace
14 | - repo: https://github.com/pycqa/flake8
15 | rev: 7.2.0
16 | hooks:
17 | - id: flake8
18 | - repo: https://github.com/hhatto/autopep8
19 | rev: v2.3.2
20 | hooks:
21 | - id: autopep8
22 | - repo: https://github.com/asottile/reorder-python-imports
23 | rev: v3.14.0
24 | hooks:
25 | - id: reorder-python-imports
26 | args: [--py39-plus, --add-import, 'from __future__ import annotations']
27 | - repo: https://github.com/asottile/pyupgrade
28 | rev: v3.19.1
29 | hooks:
30 | - id: pyupgrade
31 | args: [--py39-plus]
32 | - repo: https://github.com/asottile/setup-cfg-fmt
33 | rev: v2.8.0
34 | hooks:
35 | - id: setup-cfg-fmt
36 | - repo: https://github.com/pre-commit/mirrors-mypy
37 | rev: v1.15.0
38 | hooks:
39 | - id: mypy
40 | additional_dependencies: [types-requests]
41 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2017 Chris Kuehl
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: minimal
2 | minimal: venv
3 |
4 | venv: setup.py requirements-dev.txt Makefile tox.ini
5 | tox devenv $@
6 |
7 | .PHONY: test
8 | test: venv
9 | venv/bin/coverage erase
10 | venv/bin/coverage run -m pytest -v tests
11 | venv/bin/coverage report --show-missing --fail-under 100
12 | venv/bin/pre-commit install -f --install-hooks
13 | venv/bin/pre-commit run --all-files
14 |
15 | .PHONY: release
16 | release: venv
17 | venv/bin/python setup.py sdist bdist_wheel
18 | venv/bin/twine upload --skip-existing dist/*
19 |
20 | .PHONY: test-repo
21 | test-repo: venv
22 | venv/bin/python -m dumb_pypi.main \
23 | --package-list-json testing/package-list-json \
24 | --packages-url http://just.an.example/ \
25 | --output-dir test-repo \
26 | --logo https://i.fluffy.cc/tZRP1V8hdKCdrRQG5fBCv74M0VpcPLjP.svg \
27 | --logo-width 42
28 |
--------------------------------------------------------------------------------
/RATIONALE.md:
--------------------------------------------------------------------------------
1 | # Design rationale
2 |
3 | This document contains various bits of information discovered while
4 | implementing dumb-pypi, and explains why certain decisions were made.
5 |
6 | [PEP 503](https://www.python.org/dev/peps/pep-0503/) is the canonical reference
7 | for the PyPI "simple" API, but it is not complete (if you follow it fully, old
8 | clients cannot use your PyPI server).
9 |
10 |
11 | ## Summary of PyPI client behaviors
12 |
13 | The primary difference between different versions of pip is that newer versions
14 | do progressively more normalizing of package names in the initial request.
15 |
16 | PEP 503 states that clients must not rely on the PyPI server to redirect
17 | requests from an unnormalized name to a normalized one, but unfortunately this
18 | is not the world we live in. If you need to support older versions of pip, your
19 | PyPI server must be able to accept requests for unnormalized package names and
20 | redirect or serve them.
21 |
22 |
23 | ### pip >= 8.1.2
24 |
25 | Full normalization of package names is done before making a request to PyPI.
26 |
27 | * `pip install ocflib` => `/ocflib`
28 | * `pip install aspy.yaml` => `/aspy-yaml`
29 | * `pip install ASPY.YAML` => `/aspy-yaml`
30 |
31 | (Yes, this behavior was introduced in a *patch release* to the 8.1.x series.)
32 |
33 | Note that even with the latest pip versions, normalization is not fully applied
34 | to non-wheel links. So you might get to the right listing, but won't find the
35 | archive. For example, `aspy.yaml` has these links (files) on public PyPI:
36 |
37 | * aspy.yaml-0.2.0.tar.gz
38 | * aspy.yaml-0.2.1.tar.gz
39 | * aspy.yaml-0.2.2-py2.py3-none-any.whl
40 |
41 | You can pip install `aspy.yaml==0.2.1` but not `aspy-yaml==0.2.1`, but you
42 | *can* install `aspy-yaml==0.2.2` (wheel names are treated differently). The
43 | same thing does *not* happen with capitalization (you can install
44 | `ASPY.YAML==0.2.1`).
45 |
46 |
47 | ### 6 <= pip <= 8.1.1
48 |
49 | Some normalization is done (e.g. capitalization) but not all (e.g. dots not
50 | transformed to dashes).
51 |
52 | * `pip install ocflib` => `/ocflib`
53 | * `pip install aspy.yaml` => `/aspy.yaml`
54 | * `pip install ASPY.YAML` => `/aspy.yaml`
55 |
56 |
57 | ### pip < 6, easy_install
58 |
59 | No normalization is done.
60 |
61 | * `pip install ocflib` => `/ocflib`
62 | * `pip install aspy.yaml` => `/aspy.yaml`
63 | * `pip install ASPY.YAML` => `/ASPY.YAML`
64 |
65 |
66 | ## Package name normalization
67 |
68 | PEP 503 defines it like this:
69 |
70 | ```python
71 | def normalize(name):
72 | return re.sub(r'[-_.]+', '-', name).lower()
73 | ```
74 |
75 | Unfortunately this means you'll need to regex sub incoming requests, which is a
76 | bit more than the standard rewrite engines in Apache or nginx can accomplish,
77 | but it can still be accomplished pretty easily. The README contains an example
78 | of how you could do it with nginx.
79 |
80 |
81 | ## "api-version" meta attribute
82 |
83 | Old versions of pip (like 6.0.0) have extra restrictions when using a meta tag
84 | like ``. Newer versions (at least `>= 8`,
85 | possibly earlier) do not enforce these.
86 |
87 | Some example restrictions:
88 |
89 | * Links must have `rel="internal"`, even if you're using a relative URL or a
90 | URL to the same server, or pip refuses to download files unless you specify
91 | `--allow-external {packagename}`. This isn't a problem—we could do this.
92 |
93 | * Packages must have hashes at the end of their links. This is a bigger
94 | problem, because it means that in order to construct the index, we need to
95 | have the actual files on-hand, and hash them (which is prohibitively
96 | expensive to do during a full rebuild with tens of thousands of packages).
97 |
98 | This is an admittedly "nice-to-have" feature, but it significantly increases
99 | complexity. Hashing is too slow to do on-demand, so we'd need to somehow
100 | cache those, and then figure out when to invalidate them, and it gets too
101 | complicated quickly.
102 |
103 | For internal PyPI registries, this is an unnecessary feature, since you
104 | should be serving both the index and the packages from a trusted source over
105 | HTTPS, which already ensures integrity. The only real case that the hash is
106 | necessary is when you trust the index server but not the file host, which is
107 | not a scenario most people are concerned with.
108 |
109 | Because of the above, we do not set this meta attribute. This gains us
110 | compatibility with older versions of pip at no cost.
111 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | dumb-pypi
2 | ---------
3 |
4 | [](https://github.com/chriskuehl/dumb-pypi/actions/workflows/ci.yaml)
5 | [](https://pypi.python.org/pypi/dumb-pypi)
6 |
7 |
8 | `dumb-pypi` is a simple read-only PyPI index server generator, backed entirely
9 | by static files. It is ideal for internal use by organizations that have a
10 | bunch of their own packages which they'd like to make available.
11 |
12 | You can view [an example generated repo](https://chriskuehl.github.io/dumb-pypi/test-repo/).
13 |
14 |
15 | ## A rant about static files (and why you should use dumb-pypi)
16 |
17 | The main difference between dumb-pypi and other PyPI implementations is that
18 | dumb-pypi has *no server component*. It's just a script that, given a list of
19 | Python package names, generates a bunch of static files which you can serve
20 | from any webserver, or even directly from S3.
21 |
22 | There's something magical about being able to serve a package repository
23 | entirely from a tree of static files. It's incredibly easy to make it fast and
24 | highly-available when you don't need to worry about running a bunch of
25 | application servers (which are serving a bunch of read-only queries that could
26 | have just been pre-generated).
27 |
28 | Linux distributions have been doing this right for decades. Debian has a system
29 | of hundreds of mirrors, and the entire thing is powered entirely by some fancy
30 | `rsync` commands.
31 |
32 | For the maintainer of a PyPI repository, `dumb-pypi` has some nice properties:
33 |
34 | * **File serving is extremely fast.** nginx can serve your static files faster
35 | than you'd ever need. In practice, there are almost no limits on the number
36 | of packages or number of versions per package.
37 |
38 | * **It's very simple.** There's no complicated WSGI app to deploy, no
39 | databases, and no caches. You just need to run the script whenever you have
40 | new packages, and your index server is ready in seconds.
41 |
42 | For more about why this design was chosen, see the detailed
43 | [`RATIONALE.md`][rationale] in this repo.
44 |
45 |
46 | ## Usage
47 |
48 | To use dumb-pypi, you need two things:
49 |
50 | * A script which generates the index. (That's this project!)
51 |
52 | * A generic webserver to serve the generated index.
53 |
54 | This part is up to you. For example, you might sync the built index into an
55 | S3 bucket, and serve it directly from S3. You might run nginx from the built
56 | index locally.
57 |
58 | My recommended high-availability (but still quite simple) deployment is:
59 |
60 | * Store all of the packages in S3.
61 |
62 | * Have a cronjob (or equivalent) which rebuilds the index based on the packages
63 | in S3. This is incredibly fast—it would not be unreasonable to do it every
64 | sixty seconds. After building the index, sync it into a separate S3 bucket.
65 |
66 | * Have a webserver (or set of webservers behind a load balancer) running nginx
67 | (with the config provided below), with the source being that second S3
68 | bucket.
69 |
70 |
71 | ### Generating static files
72 |
73 | First, install `dumb-pypi` somewhere (e.g. into a virtualenv).
74 |
75 | By design, dumb-pypi does *not* require you to have the packages available when
76 | building the index. You only need a list of filenames, one per line. For
77 | example:
78 |
79 | ```
80 | dumb-init-1.1.2.tar.gz
81 | dumb_init-1.2.0-py2.py3-none-manylinux1_x86_64.whl
82 | ocflib-2016.10.31.0.40-py2.py3-none-any.whl
83 | pre_commit-0.9.2.tar.gz
84 | ```
85 |
86 | You should also know a URL to access these packages (if you serve them from the
87 | same host as the index, it can be a relative URL). For example, it might be
88 | `https://my-pypi-packages.s3.amazonaws.com/` or `../../pool/`.
89 |
90 | You can then invoke the script:
91 |
92 | ```bash
93 | $ dumb-pypi \
94 | --package-list my-packages \
95 | --packages-url https://my-pypi-packages.s3.amazonaws.com/ \
96 | --output-dir my-built-index
97 | ```
98 |
99 | The built index will be in `my-built-index`. It's now up to you to figure out
100 | how to serve that with a webserver (nginx is a good option — details below!).
101 |
102 |
103 | #### Additional options for packages
104 |
105 | You can extend the capabilities of your registry using the extended JSON input
106 | syntax when providing your package list to dumb-pypi. Instead of using the
107 | format listed above of one filename per line, format your file with one JSON
108 | object per line, like this:
109 |
110 | ```json
111 | {"filename": "dumb-init-1.1.2.tar.gz", "hash": "sha256=", "requires_python": ">=3.6", "uploaded_by": "ckuehl", "upload_timestamp": 1512539924, "yanked_reason": null, "core_metadata": "sha256="}
112 | ```
113 |
114 | | Key | Required? | Description |
115 | | -------------------- | --------- | ----------- |
116 | | `filename` | Yes | Name of the file |
117 | | `hash` | No | Hash of the file in the format `=` |
118 | | `requires_python` | No | Python requirement string for the package ([PEP345](https://peps.python.org/pep-0345/#requires-python)) |
119 | | `core_metadata` | No | Either string `"true"` or a string in the format `=` to indicate metadata is available for this file by appending `.metadata` to the file URL ([PEP658](https://peps.python.org/pep-0658/), [PEP714](https://peps.python.org/pep-0714/)) |
120 | | `uploaded_by` | No | Freeform text to indicate an uploader of the package; only shown on web UI |
121 | | `upload_timestamp` | No | UNIX timestamp to indicate upload time of the package |
122 | | `yanked_reason` | No | Freeform text to indicate the package is yanked for the given reason ([PEP592](https://peps.python.org/pep-0592/)) |
123 | | `requires_dist` | No | _(Deprecated)_ Array of requires_dist dependencies ([PEP345](https://peps.python.org/pep-0345/#requires-python)), used only in the JSON API; consider using `core_metadata` instead |
124 |
125 | The `filename` key is required. All other keys are optional and will be used to
126 | provide additional information in your generated repository. This extended
127 | information can be useful to determine, for example, who uploaded a package.
128 | (Most of this information is useful in the web UI by humans, not by pip.)
129 |
130 | Where should you get information about the hash, uploader, etc? That's up to
131 | you—dumb-pypi isn't in the business of storing or calculating this data. If
132 | you're using S3, one easy option is to store it at upload time as [S3
133 | metadata][s3-metadata].
134 |
135 |
136 | #### Partial rebuild support
137 |
138 | If you want to avoid rebuilding your entire registry constantly, you can pass
139 | the `--previous-package-list` (or `--previous-package-list-json`) argument to
140 | dumb-pypi, pointing to the list you used the last time you called dumb-pypi.
141 | Only the files relating to changed packages will be rebuilt, saving you time
142 | and unnecessary I/O.
143 |
144 | The previous package list json is available in the output as `packages.json`.
145 |
146 |
147 | ### Recommended nginx config
148 |
149 | You can serve the packages from any static webserver (including directly from
150 | S3), but for compatibility with old versions of pip, it's necessary to do a
151 | tiny bit of URL rewriting (see [`RATIONALE.md`][rationale] for full details
152 | about the behavior of various pip versions).
153 |
154 | In particular, if you want to support old pip versions, you need to apply this
155 | logic to package names (taken from [PEP 503][pep503]):
156 |
157 | ```python
158 | def normalize(name):
159 | return re.sub(r'[-_.]+', '-', name).lower()
160 | ```
161 |
162 | Here is an example nginx config which supports all versions of pip and
163 | easy_install:
164 |
165 | ```nginx
166 | server {
167 | location / {
168 | root /path/to/index;
169 | set_by_lua $canonical_uri "return string.gsub(string.lower(ngx.var.uri), '[-_.]+', '-')";
170 | try_files $uri $uri/index.html $canonical_uri $canonical_uri/index.html =404;
171 | }
172 | }
173 |
174 | ```
175 |
176 | If you don't care about easy_install or versions of pip prior to 8.1.2, you can
177 | omit the `canonical_uri` hack.
178 |
179 |
180 | ### Using your deployed index server with pip
181 |
182 | When running pip, pass `-i https://my-pypi-server/simple` or set the
183 | environment variable `PIP_INDEX_URL=https://my-pypi-server/simple`.
184 |
185 |
186 | ### Known incompatibilities with public PyPI
187 |
188 | We try to maintain compatibility with the standard PyPI interface, but there
189 | are some incompatibilities currently which are hard to fix due to dumb-pypi's
190 | design:
191 |
192 | * While [both JSON API endpoints][json-api] are supported, many keys in the
193 | JSON API are not present since they require inspecting packages which
194 | dumb-pypi can't do. Some of these, like `requires_python` and
195 | `requires_dist`, can be passed in as JSON.
196 |
197 | * The [per-version JSON API endpoint][per-version-api] only includes data about
198 | the current requested version and not _all_ versions, unlike public PyPI. In
199 | other words, if you access `/pypi//1.0.0/json`, you will only see
200 | the `1.0.0` release under the `releases` key and not every release ever made.
201 | The regular non-versioned API route (`/pypi//json`) will have all
202 | releases.
203 |
204 |
205 | ## Contributing
206 |
207 | Thanks for contributing! To get started, run `make venv` and then `.
208 | venv/bin/activate` to source the virtualenv. You should now have a `dumb-pypi`
209 | command on your path using your checked-out version of the code.
210 |
211 | To run the tests, call `make test`. To run an individual test, you can do
212 | `pytest -k name_of_test tests` (with the virtualenv activated).
213 |
214 |
215 | [rationale]: https://github.com/chriskuehl/dumb-pypi/blob/master/RATIONALE.md
216 | [pep503]: https://www.python.org/dev/peps/pep-0503/#normalized-names
217 | [s3-metadata]: https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html#UserMetadata
218 | [json-api]: https://warehouse.pypa.io/api-reference/json.html
219 | [per-version-api]: https://warehouse.pypa.io/api-reference/json.html#get--pypi--project_name---version--json
220 |
--------------------------------------------------------------------------------
/dumb_pypi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chriskuehl/dumb-pypi/2210b7456715b4691349043bf2a958c5c8ffdb64/dumb_pypi/__init__.py
--------------------------------------------------------------------------------
/dumb_pypi/main.py:
--------------------------------------------------------------------------------
1 | """A simple read-only PyPI static index server generator.
2 |
3 | To generate the registry, pass a list of packages using either --package-list
4 | or --package-list-json.
5 |
6 | By default, the entire registry is rebuilt. If you want to do a rebuild of
7 | changed packages only, you can pass --previous-package-list(-json) with the old
8 | package list.
9 | """
10 | from __future__ import annotations
11 |
12 | import argparse
13 | import collections
14 | import contextlib
15 | import inspect
16 | import itertools
17 | import json
18 | import math
19 | import os.path
20 | import re
21 | import sys
22 | import tempfile
23 | from collections.abc import Generator
24 | from collections.abc import Iterator
25 | from collections.abc import Sequence
26 | from datetime import datetime
27 | from typing import Any
28 | from typing import IO
29 | from typing import NamedTuple
30 |
31 | import jinja2
32 | import packaging.utils
33 | import packaging.version
34 |
35 | CHANGELOG_ENTRIES_PER_PAGE = 5000
36 | DIGIT_RE = re.compile('([0-9]+)', re.ASCII)
37 | # Copied from distlib/wheel.py
38 | WHEEL_FILENAME_RE = re.compile(r'''
39 | (?P[^-]+)
40 | -(?P\d+[^-]*)
41 | (-(?P\d+[^-]*))?
42 | -(?P\w+\d+(\.\w+\d+)*)
43 | -(?P\w+)
44 | -(?P\w+(\.\w+)*)
45 | \.whl$
46 | ''', re.IGNORECASE | re.VERBOSE)
47 |
48 |
49 | def remove_extension(name: str) -> str:
50 | if name.endswith(('gz', 'bz2')):
51 | name, _ = name.rsplit('.', 1)
52 | name, _ = name.rsplit('.', 1)
53 | return name
54 |
55 |
56 | def guess_name_version_from_filename(
57 | filename: str,
58 | ) -> tuple[str, str | None]:
59 | if filename.endswith('.whl'):
60 | # TODO: Switch to packaging.utils.parse_wheel_filename which enforces
61 | # PEP440 versions for wheels.
62 | m = WHEEL_FILENAME_RE.match(filename)
63 | if m is not None:
64 | return m.group('nm'), m.group('vn')
65 | else:
66 | raise ValueError(f'Invalid package name: {filename}')
67 | else:
68 | # These don't have a well-defined format like wheels do, so they are
69 | # sort of "best effort", with lots of tests to back them up.
70 | # The most important thing is to correctly parse the name.
71 | name = remove_extension(filename)
72 | version = None
73 |
74 | if '-' in name:
75 | if name.count('-') == 1:
76 | name, version = name.split('-')
77 | else:
78 | parts = name.split('-')
79 | for i in range(len(parts) - 1, 0, -1):
80 | part = parts[i]
81 | if '.' in part and re.search('[0-9]', part):
82 | name, version = '-'.join(parts[0:i]), '-'.join(parts[i:])
83 |
84 | # possible with poorly-named files
85 | if len(name) <= 0:
86 | raise ValueError(f'Invalid package name: {filename}')
87 |
88 | # impossible
89 | assert version is None or len(version) > 0, version
90 |
91 | return name, version
92 |
93 |
94 | def _natural_key(s: str) -> tuple[int | str, ...]:
95 | return tuple(
96 | int(part) if part.isdigit() else part
97 | for part in DIGIT_RE.split(s)
98 | )
99 |
100 |
101 | class Package(NamedTuple):
102 | filename: str
103 | name: str
104 | version: str | None
105 | parsed_version: packaging.version.Version
106 | hash: str | None
107 | requires_dist: tuple[str, ...] | None
108 | requires_python: str | None
109 | core_metadata: str | None
110 | upload_timestamp: int | None
111 | uploaded_by: str | None
112 | yanked_reason: str | None
113 |
114 | def __lt__(self, other: tuple[Any, ...]) -> bool:
115 | assert isinstance(other, Package), type(other)
116 | return self.sort_key < other.sort_key
117 |
118 | @property
119 | def sort_key(self) -> tuple[str, packaging.version.Version, bool, tuple[str | int, ...], str]:
120 | """Sort key for a filename."""
121 | return (
122 | self.name,
123 | self.parsed_version,
124 | # sort wheels first
125 | not self.filename.endswith('.whl'),
126 | # natural sort within
127 | _natural_key(self.filename),
128 | # all things equal, use filename
129 | self.filename,
130 | )
131 |
132 | @property
133 | def formatted_upload_time(self) -> str:
134 | assert self.upload_timestamp is not None
135 | dt = datetime.utcfromtimestamp(self.upload_timestamp)
136 | return _format_datetime(dt)
137 |
138 | @property
139 | def info_string(self) -> str:
140 | # TODO: I'd like to remove this "info string" and instead format things
141 | # nicely for humans (e.g. in a table or something).
142 | #
143 | # This might mean changing the web interface to use different pages for
144 | # humans than the /simple/ ones it currently links to. (Even if pip can
145 | # parse links from a
, it might add significantly more bytes.)
146 | info = self.version or 'unknown version'
147 | if self.upload_timestamp is not None:
148 | info += f', {self.formatted_upload_time}'
149 | if self.uploaded_by is not None:
150 | info += f', {self.uploaded_by}'
151 | return info
152 |
153 | def url(self, base_url: str, *, include_hash: bool = True) -> str:
154 | hash_part = f'#{self.hash}' if self.hash and include_hash else ''
155 | return f'{base_url.rstrip("/")}/{self.filename}{hash_part}'
156 |
157 | @property
158 | def packagetype(self) -> str:
159 | if self.filename.endswith('.whl'):
160 | return 'bdist_wheel'
161 | elif self.filename.endswith('.egg'):
162 | return 'bdist_egg'
163 | else:
164 | return 'sdist'
165 |
166 | def json_info(self, base_url: str) -> dict[str, Any]:
167 | ret: dict[str, Any] = {
168 | 'filename': self.filename,
169 | 'url': self.url(base_url, include_hash=False),
170 | 'requires_python': self.requires_python,
171 | 'packagetype': self.packagetype,
172 | 'yanked': bool(self.yanked_reason),
173 | 'yanked_reason': self.yanked_reason,
174 | }
175 | if self.upload_timestamp is not None:
176 | ret['upload_time'] = self.formatted_upload_time
177 | if self.hash is not None:
178 | algo, h = self.hash.split('=')
179 | ret['digests'] = {algo: h}
180 | return ret
181 |
182 | def input_json(self) -> dict[str, Any]:
183 | """A dict suitable for json lines."""
184 | return {
185 | k: getattr(self, k)
186 | for k in inspect.getfullargspec(self.create).kwonlyargs
187 | if getattr(self, k) is not None
188 | }
189 |
190 | @classmethod
191 | def create(
192 | cls,
193 | *,
194 | filename: str,
195 | hash: str | None = None,
196 | requires_dist: Sequence[str] | None = None,
197 | requires_python: str | None = None,
198 | upload_timestamp: int | None = None,
199 | uploaded_by: str | None = None,
200 | yanked_reason: str | None = None,
201 | core_metadata: str | None = None,
202 | ) -> Package:
203 | if not re.match(r'[a-zA-Z0-9_\-\.\+]+$', filename) or '..' in filename:
204 | raise ValueError(f'Unsafe package name: {filename}')
205 |
206 | name, version = guess_name_version_from_filename(filename)
207 | return cls(
208 | filename=filename,
209 | name=packaging.utils.canonicalize_name(name),
210 | version=version,
211 | parsed_version=packaging.version.parse(version or '0'),
212 | hash=hash,
213 | requires_dist=tuple(requires_dist) if requires_dist is not None else None,
214 | requires_python=requires_python,
215 | core_metadata=core_metadata,
216 | upload_timestamp=upload_timestamp,
217 | uploaded_by=uploaded_by,
218 | yanked_reason=yanked_reason,
219 | )
220 |
221 |
222 | @contextlib.contextmanager
223 | def atomic_write(path: str) -> Generator[IO[str]]:
224 | tmp = tempfile.mktemp(
225 | prefix='.' + os.path.basename(path),
226 | dir=os.path.dirname(path),
227 | )
228 | try:
229 | with open(tmp, 'w') as f:
230 | yield f
231 | except BaseException:
232 | os.remove(tmp)
233 | raise
234 | else:
235 | os.replace(tmp, path)
236 |
237 |
238 | def _format_datetime(dt: datetime) -> str:
239 | return dt.strftime('%Y-%m-%d %H:%M:%S')
240 |
241 |
242 | IMPORTANT_METADATA_FOR_INFO = frozenset((
243 | 'name',
244 | 'version',
245 | 'requires_dist',
246 | 'requires_python',
247 | ))
248 |
249 |
250 | def _package_json(sorted_files: list[Package], base_url: str) -> dict[str, Any]:
251 | # https://warehouse.pypa.io/api-reference/json.html
252 | # note: the full api contains much more, we only output the info we have
253 | by_version: dict[str, list[Package]] = collections.defaultdict(list)
254 | for file in sorted_files:
255 | if file.version is not None:
256 | by_version[file.version].append(file)
257 |
258 | # Find a file from the latest release to use for "info". We don't want to
259 | # mix-and-match the metadata across releases since tools like Poetry rely
260 | # on this, but we do want to pick the file in the release with the most
261 | # populated metadata.
262 | latest_file = sorted_files[-1]
263 | if sorted_files[-1].version is not None:
264 | latest_file = max(
265 | by_version[sorted_files[-1].version],
266 | key=lambda f: sum(bool(getattr(f, v)) for v in IMPORTANT_METADATA_FOR_INFO),
267 | )
268 |
269 | return {
270 | 'info': {
271 | 'name': latest_file.name,
272 | 'version': latest_file.version,
273 | 'requires_dist': latest_file.requires_dist,
274 | 'requires_python': latest_file.requires_python,
275 | 'platform': "UNKNOWN",
276 | 'summary': None,
277 | 'yanked': bool(latest_file.yanked_reason),
278 | 'yanked_reason': latest_file.yanked_reason,
279 | },
280 | 'releases': {
281 | version: [file_.json_info(base_url) for file_ in files]
282 | for version, files in by_version.items()
283 | },
284 | 'urls': [
285 | file_.json_info(base_url)
286 | for file_ in by_version[latest_file.version]
287 | ] if latest_file and latest_file.version is not None else [],
288 | }
289 |
290 |
291 | class Settings(NamedTuple):
292 | output_dir: str
293 | packages_url: str
294 | title: str
295 | logo: str
296 | logo_width: int
297 | generate_timestamp: bool
298 | disable_per_release_json: bool
299 |
300 |
301 | def build_repo(
302 | packages: dict[str, set[Package]],
303 | previous_packages: dict[str, set[Package]] | None,
304 | settings: Settings,
305 | ) -> None:
306 | simple = os.path.join(settings.output_dir, 'simple')
307 | pypi = os.path.join(settings.output_dir, 'pypi')
308 | current_date = _format_datetime(datetime.utcnow())
309 |
310 | jinja_env = jinja2.Environment(
311 | loader=jinja2.PackageLoader('dumb_pypi', 'templates'),
312 | autoescape=True,
313 | )
314 | jinja_env.globals['title'] = settings.title
315 | jinja_env.globals['packages_url'] = settings.packages_url
316 | jinja_env.globals['logo'] = settings.logo
317 | jinja_env.globals['logo_width'] = settings.logo_width
318 |
319 | # Short circuit if nothing changed at all.
320 | if packages == previous_packages:
321 | return
322 |
323 | # Sorting package versions is actually pretty expensive, so we do it once
324 | # at the start.
325 | sorted_packages = {name: sorted(files) for name, files in packages.items()}
326 |
327 | # /simple/index.html
328 | # Rebuild if there are different package names.
329 | if previous_packages is None or set(packages) != set(previous_packages):
330 | os.makedirs(simple, exist_ok=True)
331 | with atomic_write(os.path.join(simple, 'index.html')) as f:
332 | f.write(jinja_env.get_template('simple.html').render(
333 | date=current_date,
334 | generate_timestamp=settings.generate_timestamp,
335 | package_names=sorted(sorted_packages),
336 | ))
337 |
338 | for package_name, sorted_files in sorted_packages.items():
339 | # Rebuild if the files are different for this package.
340 | if previous_packages is None or previous_packages[package_name] != packages[package_name]:
341 | latest_version = sorted_files[-1].version
342 |
343 | # /simple/{package}/index.html
344 | simple_package_dir = os.path.join(simple, package_name)
345 | os.makedirs(simple_package_dir, exist_ok=True)
346 | with atomic_write(os.path.join(simple_package_dir, 'index.html')) as f:
347 | f.write(jinja_env.get_template('package.html').render(
348 | date=current_date,
349 | generate_timestamp=settings.generate_timestamp,
350 | package_name=package_name,
351 | files=sorted_files,
352 | packages_url=settings.packages_url,
353 | requirement=f'{package_name}=={latest_version}' if latest_version else package_name,
354 | ))
355 |
356 | # /pypi/{package}/json
357 | pypi_package_dir = os.path.join(pypi, package_name)
358 | os.makedirs(pypi_package_dir, exist_ok=True)
359 | with atomic_write(os.path.join(pypi_package_dir, 'json')) as f:
360 | json.dump(_package_json(sorted_files, settings.packages_url), f)
361 |
362 | # /pypi/{package}/{version}/json
363 | if not settings.disable_per_release_json:
364 | # TODO: Consider making this only generate JSON for the changed versions.
365 | version_to_files = collections.defaultdict(list)
366 | for file_ in sorted_files:
367 | version_to_files[file_.version].append(file_)
368 | for version, files in version_to_files.items():
369 | if version is None:
370 | continue
371 | version_dir = os.path.join(pypi_package_dir, version)
372 | os.makedirs(version_dir, exist_ok=True)
373 | with atomic_write(os.path.join(version_dir, 'json')) as f:
374 | json.dump(_package_json(files, settings.packages_url), f)
375 |
376 | # /changelog
377 | # Always rebuild (we would have short circuited already if nothing changed).
378 | changelog = os.path.join(settings.output_dir, 'changelog')
379 | os.makedirs(changelog, exist_ok=True)
380 | files_newest_first = sorted(
381 | itertools.chain.from_iterable(packages.values()),
382 | key=lambda package: (-(package.upload_timestamp or 0), package),
383 | )
384 | page_count = math.ceil(len(files_newest_first) / CHANGELOG_ENTRIES_PER_PAGE)
385 | for page_idx, start_idx in enumerate(range(0, len(files_newest_first), CHANGELOG_ENTRIES_PER_PAGE)):
386 | chunk = files_newest_first[start_idx:start_idx + CHANGELOG_ENTRIES_PER_PAGE]
387 | page_number = page_idx + 1
388 | with atomic_write(os.path.join(changelog, f'page{page_number}.html')) as f:
389 | pagination_first = "page1.html" if page_number != 1 else None
390 | pagination_last = f"page{page_count}.html" if page_number != page_count else None
391 | pagination_prev = f"page{page_number - 1}.html" if page_number != 1 else None
392 | pagination_next = f"page{page_number + 1}.html" if page_number != page_count else None
393 | f.write(jinja_env.get_template('changelog.html').render(
394 | files_newest_first=chunk,
395 | page_number=page_number,
396 | page_count=page_count,
397 | pagination_first=pagination_first,
398 | pagination_last=pagination_last,
399 | pagination_prev=pagination_prev,
400 | pagination_next=pagination_next,
401 | ))
402 |
403 | # /index.html
404 | # Always rebuild (we would have short circuited already if nothing changed).
405 | with atomic_write(os.path.join(settings.output_dir, 'index.html')) as f:
406 | f.write(jinja_env.get_template('index.html').render(
407 | packages=sorted(
408 | (
409 | package,
410 | sorted_versions[-1].version,
411 | )
412 | for package, sorted_versions in sorted_packages.items()
413 | ),
414 | ))
415 |
416 | # /packages.json
417 | # Always rebuild (we would have short circuited already if nothing changed).
418 | with atomic_write(os.path.join(settings.output_dir, 'packages.json')) as f:
419 | for package in itertools.chain.from_iterable(sorted_packages.values()):
420 | f.write(f'{json.dumps(package.input_json())}\n')
421 |
422 |
423 | def _lines_from_path(path: str) -> list[str]:
424 | f = sys.stdin if path == '-' else open(path)
425 | return f.read().splitlines()
426 |
427 |
428 | def _create_packages(
429 | package_infos: Iterator[dict[str, Any]],
430 | ) -> dict[str, set[Package]]:
431 | packages: dict[str, set[Package]] = collections.defaultdict(set)
432 | for package_info in package_infos:
433 | try:
434 | package = Package.create(**package_info)
435 | except ValueError as ex:
436 | # TODO: this should really be optional; i'd prefer it to fail hard
437 | print(f'{ex} (skipping package)', file=sys.stderr)
438 | else:
439 | packages[package.name].add(package)
440 |
441 | return packages
442 |
443 |
444 | def package_list(path: str) -> dict[str, set[Package]]:
445 | return _create_packages({'filename': line} for line in _lines_from_path(path))
446 |
447 |
448 | def package_list_json(path: str) -> dict[str, set[Package]]:
449 | return _create_packages(json.loads(line) for line in _lines_from_path(path))
450 |
451 |
452 | def main(argv: Sequence[str] | None = None) -> int:
453 | parser = argparse.ArgumentParser(
454 | description=__doc__,
455 | formatter_class=argparse.RawTextHelpFormatter,
456 | )
457 |
458 | package_input_group = parser.add_mutually_exclusive_group(required=True)
459 | package_input_group.add_argument(
460 | '--package-list',
461 | help='path to a list of packages (one per line)',
462 | type=package_list,
463 | dest='packages',
464 | )
465 | package_input_group.add_argument(
466 | '--package-list-json',
467 | help='path to a list of packages (one JSON object per line)',
468 | type=package_list_json,
469 | dest='packages',
470 | )
471 |
472 | previous_package_input_group = parser.add_mutually_exclusive_group(required=False)
473 | previous_package_input_group.add_argument(
474 | '--previous-package-list',
475 | help='path to the previous list of packages (for partial rebuilds)',
476 | type=package_list,
477 | dest='previous_packages',
478 | )
479 | previous_package_input_group.add_argument(
480 | '--previous-package-list-json',
481 | help='path to the previous list of packages (for partial rebuilds)',
482 | type=package_list_json,
483 | dest='previous_packages',
484 | )
485 |
486 | parser.add_argument(
487 | '--output-dir', help='path to output to', required=True,
488 | )
489 | parser.add_argument(
490 | '--packages-url',
491 | help='url to packages (can be absolute or relative)', required=True,
492 | )
493 | parser.add_argument(
494 | '--title',
495 | help='site title (for web interface)', default='My Private PyPI',
496 | )
497 | parser.add_argument(
498 | '--logo',
499 | help='URL for logo to display (defaults to no logo)',
500 | )
501 | parser.add_argument(
502 | '--logo-width', type=int,
503 | help='width of logo to display', default=0,
504 | )
505 | parser.add_argument(
506 | '--no-generate-timestamp',
507 | action='store_false', dest='generate_timestamp',
508 | help=(
509 | "Don't template creation timestamp in outputs. This option makes "
510 | 'the output repeatable.'
511 | ),
512 | )
513 | parser.add_argument(
514 | '--no-per-release-json',
515 | action='store_true',
516 | help=(
517 | 'Disable per-release JSON API (/pypi///json).\n'
518 | 'This may be useful for large repositories because this metadata can be '
519 | 'a huge number of files for little benefit as almost no tools use it.'
520 | ),
521 | )
522 | args = parser.parse_args(argv)
523 |
524 | settings = Settings(
525 | output_dir=args.output_dir,
526 | packages_url=args.packages_url,
527 | title=args.title,
528 | logo=args.logo,
529 | logo_width=args.logo_width,
530 | generate_timestamp=args.generate_timestamp,
531 | disable_per_release_json=args.no_per_release_json,
532 | )
533 | build_repo(args.packages, args.previous_packages, settings)
534 | return 0
535 |
536 |
537 | if __name__ == '__main__':
538 | raise SystemExit(main())
539 |
--------------------------------------------------------------------------------
/dumb_pypi/templates/_base.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | {% block title %}{% endblock %}
5 |
55 |
129 | {% block head_extra %}{% endblock %}
130 |
131 |
132 |
133 |