├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── ci
└── azure-pipelines.yml
├── requirements.txt
├── setup.cfg
├── setup.py
├── src
└── asgi_sitemaps
│ ├── __init__.py
│ ├── __version__.py
│ ├── _app.py
│ ├── _generation.py
│ ├── _models.py
│ ├── _types.py
│ └── py.typed
└── tests
├── __init__.py
├── test_integration.py
└── test_unit.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Tooling.
2 | .coverage
3 | venv*/
4 |
5 | # Caches.
6 | __pycache__/
7 | *.pyc
8 | .mypy_cache/
9 | .pytest_cache/
10 |
11 | # Packaging.
12 | build/
13 | dist/
14 | *.egg-info/
15 |
16 | # Private.
17 | .env
18 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6 |
7 | ## 1.0 - 2022-02-13
8 |
9 | ### Added
10 |
11 | - _Now marked as Production/Stable software._ (Pull #14)
12 | - Add official support for Python 3.9 and Python 3.10. (Pull #13)
13 |
14 | ## 0.3.2 - 2020-07-07
15 |
16 | ### Fixed
17 |
18 | - Fix support for async items. (Pull #9)
19 |
20 | ## 0.3.1 - 2020-07-05
21 |
22 | ### Fixed
23 |
24 | - Fix `Scope` type hint: values are now `Any`.
25 |
26 | ## 0.3.0 - 2020-07-05
27 |
28 | This release changes the approach from "scrape the ASGI app to gather URLs" to a programmatic class-based API inspired by Django's sitemap framework.
29 |
30 | As such, the command line application does not exist anymore. Users are expected to define `Sitemap` classes, compose them into a `SitemapApp` endpoint, and add that to their ASGI app routing table.
31 |
32 | See the new `README.md` documentation for more information.
33 |
34 | ### Changed
35 |
36 | - Switch to a class-based dynamic endpoint API. (Pull #4)
37 |
38 | ## 0.2.0 - 2020-06-01
39 |
40 | ### Changed
41 |
42 | - Project was renamed from `sitemaps` to `asgi-sitemaps` - sitemap generation for ASGI apps. (Pull #2)
43 | - Change options of CLI and programmatic API to fit new "ASGI-only" project scope. (Pull #2)
44 | - CLI now reads from stdin (for `--check` mode) and outputs sitemap to stdout. (Pull #2)
45 |
46 | ### Removed
47 |
48 | - Drop support for crawling arbitrary remote servers. (Pull #2)
49 |
50 | ### Fixed
51 |
52 | - Don't include non-200 or non-HTML URLs in sitemap. (Pull #2)
53 |
54 | ## 0.1.0 - 2020-05-31
55 |
56 | ### Added
57 |
58 | - Initial implementation: CLI and programmatic async API.
59 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Florimond Manca
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | graft src
2 | include README.md
3 | include CHANGELOG.md
4 | include LICENSE
5 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | venv = venv
2 | bin = ${venv}/bin/
3 | pysources = src tests/
4 |
5 | build:
6 | ${bin}python setup.py sdist bdist_wheel
7 | ${bin}twine check dist/*
8 | rm -r build
9 |
10 | check:
11 | ${bin}black --check --diff --target-version=py37 ${pysources}
12 | ${bin}flake8 ${pysources}
13 | ${bin}mypy ${pysources}
14 | ${bin}isort --check --diff ${pysources}
15 |
16 | install:
17 | python3 -m venv ${venv}
18 | ${bin}pip install -U pip wheel
19 | ${bin}pip install -r requirements.txt
20 |
21 | format:
22 | ${bin}autoflake --in-place --recursive ${pysources}
23 | ${bin}isort ${pysources}
24 | ${bin}black --target-version=py37 ${pysources}
25 |
26 | publish:
27 | ${bin}twine upload dist/*
28 |
29 | test:
30 | ${bin}pytest
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # asgi-sitemaps
2 |
3 | [](https://dev.azure.com/florimondmanca/public/_build/latest?definitionId=11&branchName=master)
4 | [](https://codecov.io/gh/florimondmanca/asgi-sitemaps)
5 | 
6 | [](https://pypi.org/project/asgi-sitemaps)
7 |
8 | [Sitemap](https://www.sitemaps.org) generation for ASGI applications. Inspired by [Django's sitemap framework](https://docs.djangoproject.com/en/3.0/ref/contrib/sitemaps/).
9 |
10 | **Contents**
11 |
12 | - [Features](#features)
13 | - [Installation](#installation)
14 | - [Quickstart](#quickstart)
15 | - [How-To](#how-to)
16 | - [Sitemap sections](#sitemap-sections)
17 | - [Dynamic generation from database queries](#dynamic-generation-from-database-queries)
18 | - [Advanced web framework integration](#advanced-web-framework-integration)
19 | - [API Reference](#api-reference)
20 | - [`Sitemap`](#class-sitemap)
21 | - [`SitemapApp`](#class-sitemapapp)
22 |
23 | ## Features
24 |
25 | - Build and compose sitemap sections into a single dynamic ASGI endpoint.
26 | - Supports drawing sitemap items from a variety of sources (static lists, (async) ORM queries, etc).
27 | - Compatible with any ASGI framework.
28 | - Fully type annotated.
29 | - 100% test coverage.
30 |
31 | ## Installation
32 |
33 | Install with pip:
34 |
35 | ```shell
36 | $ pip install 'asgi-sitemaps==1.*'
37 | ```
38 |
39 | `asgi-sitemaps` requires Python 3.7+.
40 |
41 | ## Quickstart
42 |
43 | Let's build a static sitemap for a "Hello, world!" application. The sitemap will contain a single URL entry for the home `/` endpoint.
44 |
45 | Here is the project file structure:
46 |
47 | ```console
48 | .
49 | └── server
50 | ├── __init__.py
51 | ├── app.py
52 | └── sitemap.py
53 | ```
54 |
55 | First, declare a sitemap section by subclassing `Sitemap`, then wrap it in a `SitemapApp`:
56 |
57 | ```python
58 | # server/sitemap.py
59 | import asgi_sitemaps
60 |
61 | class Sitemap(asgi_sitemaps.Sitemap):
62 | def items(self):
63 | return ["/"]
64 |
65 | def location(self, item: str):
66 | return item
67 |
68 | def changefreq(self, item: str):
69 | return "monthly"
70 |
71 | sitemap = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io")
72 | ```
73 |
74 | Now, register the `sitemap` endpoint as a route onto your ASGI app. For example, if using Starlette:
75 |
76 | ```python
77 | # server/app.py
78 | from starlette.applications import Starlette
79 | from starlette.responses import PlainTextResponse
80 | from starlette.routing import Route
81 | from .sitemap import sitemap
82 |
83 | async def home(request):
84 | return PlainTextResponse("Hello, world!")
85 |
86 | routes = [
87 | Route("/", home),
88 | Route("/sitemap.xml", sitemap),
89 | ]
90 |
91 | app = Starlette(routes=routes)
92 | ```
93 |
94 | Serve the app using `$ uvicorn server.app:app`, then request the sitemap:
95 |
96 | ```bash
97 | curl http://localhost:8000/sitemap.xml
98 | ```
99 |
100 | ```xml
101 |
102 |
103 |
104 | http://example.io/
105 | monthly
106 | 0.5
107 |
108 |
109 | ```
110 |
111 | Tada!
112 |
113 | To learn more:
114 |
115 | - See [How-To](#how-to) for more advanced usage, including splitting the sitemap in multiple sections, and dynamically generating entries from database queries.
116 | - See the [`Sitemap` API reference](#class-sitemap) for all supported sitemap options.
117 |
118 | ## How-To
119 |
120 | ### Sitemap sections
121 |
122 | You can combine multiple sitemap classes into a single sitemap endpoint. This is useful to split the sitemap in multiple sections that may have different `items()` and/or sitemap attributes. Such sections could be static pages, blog posts, recent articles, etc.
123 |
124 | To do so, declare multiple sitemap classes, then pass them as a list to `SitemapApp`:
125 |
126 | ```python
127 | # server/sitemap.py
128 | import asgi_sitemaps
129 |
130 | class StaticSitemap(asgi_sitemaps.Sitemap):
131 | ...
132 |
133 | class BlogSitemap(asgi_sitemaps.Sitemap):
134 | ...
135 |
136 | sitemap = asgi_sitemaps.SitemapApp([StaticSitemap(), BlogSitemap()], domain="example.io")
137 | ```
138 |
139 | Entries from each sitemap will be concatenated when building the final `sitemap.xml`.
140 |
141 | ### Dynamic generation from database queries
142 |
143 | `Sitemap.items()` supports consuming any async iterable. This means you can easily integrate with an async database client or ORM so that `Sitemap.items()` fetches and returns relevant rows for generating your sitemap.
144 |
145 | Here's an example using [Databases](https://github.com/encode/databases), assuming you have a `Database` instance in `server/resources.py`:
146 |
147 | ```python
148 | # server/sitemap.py
149 | import asgi_sitemaps
150 | from .resources import database
151 |
152 | class Sitemap(asgi_sitemaps.Sitemap):
153 | async def items(self):
154 | query = "SELECT permalink, updated_at FROM articles;"
155 | return await database.fetch_all(query)
156 |
157 | def location(self, row: dict):
158 | return row["permalink"]
159 | ```
160 |
161 | ### Advanced web framework integration
162 |
163 | While `asgi-sitemaps` is framework-agnostic, you can use the [`.scope` attribute](#scope) available on `Sitemap` instances to feed the ASGI scope into your framework-specific APIs for inspecting and manipulating request information.
164 |
165 | Here is an example with [Starlette](https://www.starlette.io) where we build sitemap of static pages. To decouple from the raw URL paths, pages are referred to by view name. We reverse-lookup their URLs by building a `Request` instance from the ASGI `.scope`, and using `.url_for()`:
166 |
167 | ```python
168 | # server/sitemap.py
169 | import asgi_sitemaps
170 | from starlette.datastructures import URL
171 | from starlette.requests import Request
172 |
173 | class StaticSitemap(asgi_sitemaps.Sitemap):
174 | def items(self):
175 | return ["home", "about", "blog:home"]
176 |
177 | def location(self, name: str):
178 | request = Request(scope=self.scope)
179 | url = request.url_for(name)
180 | return URL(url).path
181 | ```
182 |
183 | The corresponding Starlette routing table could look something like this:
184 |
185 | ```python
186 | # server/routes.py
187 | from starlette.routing import Mount, Route
188 | from . import views
189 | from .sitemap import sitemap
190 |
191 | routes = [
192 | Route("/", views.home, name="home"),
193 | Route("/about", views.about, name="about"),
194 | Route("/blog/", views.blog_home, name="blog:home"),
195 | Route("/sitemap.xml", sitemap),
196 | ]
197 | ```
198 |
199 | ## API Reference
200 |
201 | ### _class_ `Sitemap`
202 |
203 | Represents a source of sitemap entries.
204 |
205 | You can specify the type `T` of sitemap items for extra type safety:
206 |
207 | ```python
208 | import asgi_sitemaps
209 |
210 | class MySitemap(asgi_sitemaps.Sitemap[str]):
211 | ...
212 | ```
213 |
214 | #### _async_ `items`
215 |
216 | Signature: `async def () -> Union[Iterable[T], AsyncIterable[T]]`
217 |
218 | _(**Required**)_ Return an [iterable](https://docs.python.org/3/glossary.html#term-iterable) or an [asynchronous iterable](https://docs.python.org/3/glossary.html#term-asynchronous-iterable) of items of the same type. Each item will be passed as-is to `.location()`, `.lastmod()`, `.changefreq()`, and `.priority()`.
219 |
220 | Examples:
221 |
222 | ```python
223 | # Simplest usage: return a list
224 | def items(self) -> List[str]:
225 | return ["/", "/contact"]
226 |
227 | # Async operations are also supported
228 | async def items(self) -> List[dict]:
229 | query = "SELECT permalink, updated_at FROM pages;"
230 | return await database.fetch_all(query)
231 |
232 | # Sync and async generators are also supported
233 | async def items(self) -> AsyncIterator[dict]:
234 | query = "SELECT permalink, updated_at FROM pages;"
235 | async for row in database.aiter_rows(query):
236 | yield row
237 | ```
238 |
239 | #### `location`
240 |
241 | Signature: `def (item: T) -> str`
242 |
243 | _(**Required**)_ Return the absolute path of a sitemap item.
244 |
245 | "Absolute path" means an URL path without a protocol or domain. For example: `/blog/my-article`. (So `https://mydomain.com/blog/my-article` is not a valid location, nor is `mydomain.com/blog/my-article`.)
246 |
247 | #### `lastmod`
248 |
249 | Signature: `def (item: T) -> Optional[datetime.datetime]`
250 |
251 | _(Optional)_ Return the [date of last modification](https://www.sitemaps.org/protocol.html#lastmoddef) of a sitemap item as a [`datetime`](https://docs.python.org/3/library/datetime.html#datetime.datetime) object, or `None` (the default) for no `lastmod` field.
252 |
253 | #### `changefreq`
254 |
255 | Signature: `def (item: T) -> Optional[str]`
256 |
257 | _(Optional)_ Return the [change frequency](https://www.sitemaps.org/protocol.html#changefreqdef) of a sitemap item.
258 |
259 | Possible values are:
260 |
261 | - `None` - No `changefreq` field (the default).
262 | - `"always"`
263 | - `"hourly"`
264 | - `"daily"`
265 | - `"weekly"`
266 | - `"monthly"`
267 | - `"yearly"`
268 | - `"never"`
269 |
270 | #### `priority`
271 |
272 | Signature: `def (item: T) -> float`
273 |
274 | _(Optional)_ Return the [priority](https://www.sitemaps.org/protocol.html#prioritydef) of a sitemap item. Must be between 0 and 1. Defaults to `0.5`.
275 |
276 | #### `protocol`
277 |
278 | Type: `str`
279 |
280 | _(Optional)_ This attribute defines the protocol used to build URLs of the sitemap.
281 |
282 | Possible values are:
283 |
284 | - `"auto"` - The protocol with which the sitemap was requested (the default).
285 | - `"http"`
286 | - `"https"`
287 |
288 | #### `scope`
289 |
290 | This property returns the [ASGI scope](https://asgi.readthedocs.io/en/latest/specs/www.html#connection-scope) of the current HTTP request.
291 |
292 | ### _class_ `SitemapApp`
293 |
294 | An ASGI application that responds to HTTP requests with the `sitemap.xml` contents of the sitemap.
295 |
296 | Parameters:
297 |
298 | - _(**Required**)_ `sitemaps` - A `Sitemap` object or a list of `Sitemap` objects, used to generate sitemap entries.
299 | - _(**Required**)_ `domain` - The domain to use when generating sitemap URLs.
300 |
301 | Examples:
302 |
303 | ```python
304 | sitemap = SitemapApp(Sitemap(), domain="mydomain.com")
305 | sitemap = SitemapApp([StaticSitemap(), BlogSitemap()], domain="mydomain.com")
306 | ```
307 |
308 | ## License
309 |
310 | MIT
311 |
--------------------------------------------------------------------------------
/ci/azure-pipelines.yml:
--------------------------------------------------------------------------------
1 | resources:
2 | repositories:
3 | - repository: templates
4 | type: github
5 | endpoint: github
6 | name: florimondmanca/azure-pipelines-templates
7 | ref: refs/tags/5.0
8 |
9 | trigger:
10 | - master
11 | - refs/tags/*
12 |
13 | pr:
14 | - master
15 |
16 | variables:
17 | - name: CI
18 | value: "true"
19 | - name: PIP_CACHE_DIR
20 | value: $(Pipeline.Workspace)/.cache/pip
21 | - group: pypi-credentials
22 |
23 | stages:
24 | - stage: test
25 | jobs:
26 | - template: job--python-check.yml@templates
27 | parameters:
28 | pythonVersion: "3.10"
29 |
30 | - template: job--python-test.yml@templates
31 | parameters:
32 | jobs:
33 | py37:
34 | py310:
35 | coverage: true
36 |
37 | - stage: publish
38 | condition: startsWith(variables['Build.SourceBranch'], 'refs/tags/')
39 | jobs:
40 | - template: job--python-publish.yml@templates
41 | parameters:
42 | pythonVersion: "3.10"
43 | token: $(pypiToken)
44 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 |
3 | # Build and publishing.
4 | twine
5 | wheel
6 |
7 | # Tooling and tests.
8 | autoflake
9 | black==22.1.0
10 | flake8==4.*
11 | httpx==0.22.*
12 | isort==5.*
13 | mypy
14 | pytest==7.*
15 | pytest-asyncio==0.18.*
16 | pytest-cov
17 | seed-isort-config
18 | starlette==0.18.*
19 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = W503, E203, B305
3 | max-line-length = 88
4 |
5 | [mypy]
6 | disallow_untyped_defs = True
7 | ignore_missing_imports = True
8 |
9 | [tool:isort]
10 | profile = black
11 | known_first_party = asgi_sitemaps,tests
12 | known_third_party = httpx,pytest,setuptools,starlette
13 |
14 | [tool:pytest]
15 | addopts =
16 | -rxXs
17 | --cov=src
18 | --cov=tests
19 | --cov-report=term-missing
20 | --cov-fail-under=100
21 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import re
2 | from pathlib import Path
3 |
4 | from setuptools import find_packages, setup
5 |
6 |
7 | def get_version(package: str) -> str:
8 | version = (Path("src") / package / "__version__.py").read_text()
9 | match = re.search("__version__ = ['\"]([^'\"]+)['\"]", version)
10 | assert match is not None
11 | return match.group(1)
12 |
13 |
14 | def get_long_description() -> str:
15 | with open("README.md", encoding="utf8") as readme:
16 | with open("CHANGELOG.md", encoding="utf8") as changelog:
17 | return readme.read() + "\n\n" + changelog.read()
18 |
19 |
20 | setup(
21 | name="asgi-sitemaps",
22 | version=get_version("asgi_sitemaps"),
23 | description="Sitemap generation for ASGI applications.",
24 | long_description=get_long_description(),
25 | long_description_content_type="text/markdown",
26 | url="http://github.com/florimondmanca/asgi-sitemaps",
27 | author="Florimond Manca",
28 | author_email="florimond.manca@protonmail.com",
29 | packages=find_packages("src"),
30 | package_dir={"": "src"},
31 | include_package_data=True,
32 | zip_safe=False,
33 | install_requires=[],
34 | python_requires=">=3.7",
35 | license="MIT",
36 | classifiers=[
37 | "Development Status :: 5 - Production/Stable",
38 | "Intended Audience :: Developers",
39 | "Operating System :: OS Independent",
40 | "Framework :: AsyncIO",
41 | "Programming Language :: Python :: 3",
42 | "Programming Language :: Python :: 3 :: Only",
43 | "Programming Language :: Python :: 3.7",
44 | "Programming Language :: Python :: 3.8",
45 | "Programming Language :: Python :: 3.9",
46 | "Programming Language :: Python :: 3.10",
47 | ],
48 | )
49 |
--------------------------------------------------------------------------------
/src/asgi_sitemaps/__init__.py:
--------------------------------------------------------------------------------
1 | from .__version__ import __version__
2 | from ._app import SitemapApp
3 | from ._models import Sitemap
4 |
5 | __all__ = [
6 | "__version__",
7 | "Sitemap",
8 | "SitemapApp",
9 | ]
10 |
--------------------------------------------------------------------------------
/src/asgi_sitemaps/__version__.py:
--------------------------------------------------------------------------------
1 | __version__ = "1.0.0"
2 |
--------------------------------------------------------------------------------
/src/asgi_sitemaps/_app.py:
--------------------------------------------------------------------------------
1 | from typing import Callable, Sequence, Union
2 |
3 | from ._generation import generate_sitemap
4 | from ._models import Sitemap
5 | from ._types import Scope
6 |
7 |
8 | class SitemapApp:
9 | def __init__(
10 | self, sitemaps: Union[Sitemap, Sequence[Sitemap]], *, domain: str
11 | ) -> None:
12 | self._sitemaps = [sitemaps] if isinstance(sitemaps, Sitemap) else sitemaps
13 | self._domain = domain
14 |
15 | async def __call__(self, scope: Scope, receive: Callable, send: Callable) -> None:
16 | assert scope["type"] == "http"
17 |
18 | content = await generate_sitemap(
19 | self._sitemaps, scope=scope, domain=self._domain
20 | )
21 |
22 | headers = [
23 | [b"content-type", b"application/xml"],
24 | [b"content-length", b"%d" % len(content)],
25 | ]
26 |
27 | message = await receive()
28 | assert message["type"] == "http.request"
29 | await send({"type": "http.response.start", "status": 200, "headers": headers})
30 | await send({"type": "http.response.body", "body": content})
31 |
--------------------------------------------------------------------------------
/src/asgi_sitemaps/_generation.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | from typing import (
3 | AsyncIterable,
4 | AsyncIterator,
5 | Awaitable,
6 | Dict,
7 | Iterable,
8 | Sequence,
9 | cast,
10 | )
11 | from urllib.parse import urljoin, urlsplit
12 |
13 | from ._models import SCOPE_CTX_VAR, Sitemap
14 | from ._types import ItemsTypes, Scope, T
15 |
16 |
17 | async def generate_sitemap(
18 | sitemaps: Sequence[Sitemap], *, scope: Scope, domain: str
19 | ) -> bytes:
20 | SCOPE_CTX_VAR.set(scope)
21 |
22 | async def _lines() -> AsyncIterator[bytes]:
23 | yield b''
24 | yield b''
25 |
26 | for sitemap in sitemaps:
27 | async for item in _ensure_async_iterator(sitemap.items()):
28 | yield 4 * b" " + b""
29 |
30 | fields = get_fields(sitemap, item, scope=scope, domain=domain)
31 | for name, value in fields.items():
32 | yield 8 * b" " + f"<{name}>{value}{name}>".encode("utf-8")
33 |
34 | yield 4 * b" " + b""
35 |
36 | yield b""
37 | yield b""
38 |
39 | return b"\n".join([line async for line in _lines()])
40 |
41 |
42 | async def _ensure_async_iterator(items: ItemsTypes[T]) -> AsyncIterator[T]:
43 | if hasattr(items, "__aiter__"):
44 | items = cast(AsyncIterable[T], items)
45 | async for item in items:
46 | yield item
47 | elif inspect.isawaitable(items):
48 | items = cast(Awaitable[Iterable[T]], items)
49 | for item in await items:
50 | yield item
51 | else:
52 | items = cast(Iterable[T], items)
53 | for item in items:
54 | yield item
55 |
56 |
57 | def get_fields(
58 | sitemap: Sitemap[T], item: T, *, scope: Scope, domain: str
59 | ) -> Dict[str, str]:
60 | if sitemap.protocol == "auto":
61 | protocol = scope["scheme"]
62 | else:
63 | protocol = sitemap.protocol
64 |
65 | location = sitemap.location(item)
66 | lastmod = sitemap.lastmod(item)
67 | changefreq = sitemap.changefreq(item)
68 | priority = sitemap.priority(item)
69 |
70 | r = urlsplit(location)
71 | if r.scheme or r.netloc:
72 | raise ValueError(f"Location contains scheme or domain: {location}")
73 |
74 | fields = {}
75 | fields["loc"] = urljoin(f"{protocol}://{domain}", location)
76 | if lastmod is not None:
77 | fields["lastmod"] = lastmod.strftime("%Y-%m-%d")
78 | if changefreq is not None:
79 | fields["changefreq"] = changefreq
80 | fields["priority"] = str(priority)
81 |
82 | return fields
83 |
--------------------------------------------------------------------------------
/src/asgi_sitemaps/_models.py:
--------------------------------------------------------------------------------
1 | import contextvars
2 | import datetime as dt
3 | from typing import Generic, Optional
4 |
5 | from ._types import ItemsTypes, Scope, T
6 |
7 | SCOPE_CTX_VAR = contextvars.ContextVar[Scope]("asgi_sitemaps.scope")
8 |
9 |
10 | class Sitemap(Generic[T]):
11 | protocol = "auto"
12 |
13 | def __init__(self) -> None:
14 | assert self.protocol in ("http", "https", "auto")
15 |
16 | def items(self) -> ItemsTypes:
17 | raise NotImplementedError # pragma: no cover
18 |
19 | def location(self, item: T) -> str:
20 | raise NotImplementedError # pragma: no cover
21 |
22 | def lastmod(self, item: T) -> Optional[dt.datetime]:
23 | return None
24 |
25 | def changefreq(self, item: T) -> Optional[str]:
26 | return None
27 |
28 | def priority(self, item: T) -> float:
29 | return 0.5
30 |
31 | @property
32 | def scope(self) -> Scope:
33 | try:
34 | return SCOPE_CTX_VAR.get()
35 | except LookupError: # pragma: no cover
36 | raise RuntimeError("scope accessed outside of an ASGI request")
37 |
--------------------------------------------------------------------------------
/src/asgi_sitemaps/_types.py:
--------------------------------------------------------------------------------
1 | from typing import Any, AsyncIterable, Awaitable, Dict, Iterable, TypeVar, Union
2 |
3 | T = TypeVar("T")
4 | ItemsTypes = Union[Iterable[T], Awaitable[Iterable[T]], AsyncIterable[T]]
5 | Scope = Dict[str, Any]
6 |
--------------------------------------------------------------------------------
/src/asgi_sitemaps/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/florimondmanca/asgi-sitemaps/009b0f04ef4bca29bcfb8304411b27b9b2bda64e/src/asgi_sitemaps/py.typed
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/florimondmanca/asgi-sitemaps/009b0f04ef4bca29bcfb8304411b27b9b2bda64e/tests/__init__.py
--------------------------------------------------------------------------------
/tests/test_integration.py:
--------------------------------------------------------------------------------
1 | import datetime as dt
2 | from textwrap import dedent
3 | from typing import Any, AsyncIterator, List, Optional
4 |
5 | import httpx
6 | import pytest
7 | from starlette.applications import Starlette
8 | from starlette.datastructures import URL
9 | from starlette.requests import Request
10 | from starlette.routing import Route
11 |
12 | import asgi_sitemaps
13 |
14 |
15 | @pytest.mark.asyncio
16 | async def test_sitemap() -> None:
17 | """
18 | A basic example returns expected sitemap XML content.
19 | """
20 |
21 | class Sitemap(asgi_sitemaps.Sitemap[str]):
22 | def items(self) -> List[str]:
23 | return ["/", "/about"]
24 |
25 | def location(self, item: str) -> str:
26 | return item
27 |
28 | app = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io")
29 |
30 | async with httpx.AsyncClient(app=app) as client:
31 | r = await client.get("http://testserver")
32 |
33 | content = dedent(
34 | """
35 |
36 |
37 |
38 | http://example.io/
39 | 0.5
40 |
41 |
42 | http://example.io/about
43 | 0.5
44 |
45 |
46 | """
47 | ).lstrip()
48 |
49 | assert r.status_code == 200
50 | assert r.text == content
51 | assert r.headers["content-type"] == "application/xml"
52 | assert r.headers["content-length"] == str(len(content))
53 |
54 |
55 | @pytest.mark.asyncio
56 | async def test_sitemap_fields() -> None:
57 | """
58 | Custom sitemap fields behave as expected.
59 | """
60 |
61 | class Sitemap(asgi_sitemaps.Sitemap[int]):
62 | protocol = "https"
63 |
64 | def items(self) -> List[int]:
65 | return list(range(3))
66 |
67 | def location(self, k: int) -> str:
68 | return f"/page{k + 1}"
69 |
70 | def lastmod(self, k: int) -> Optional[dt.datetime]:
71 | if k % 3 == 0:
72 | return dt.datetime(2020, 1, 1)
73 | elif k % 3 == 1:
74 | return None
75 | else:
76 | return dt.datetime(2018, 3, 14)
77 |
78 | def changefreq(self, k: int) -> Optional[str]:
79 | if k % 3 == 0:
80 | return "daily"
81 | elif k % 3 == 1:
82 | return "monthly"
83 | else:
84 | return None
85 |
86 | def priority(self, k: int) -> float:
87 | return 0.7
88 |
89 | app = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io")
90 |
91 | async with httpx.AsyncClient(app=app) as client:
92 | r = await client.get("http://testserver")
93 |
94 | content = dedent(
95 | """
96 |
97 |
98 |
99 | https://example.io/page1
100 | 2020-01-01
101 | daily
102 | 0.7
103 |
104 |
105 | https://example.io/page2
106 | monthly
107 | 0.7
108 |
109 |
110 | https://example.io/page3
111 | 2018-03-14
112 | 0.7
113 |
114 |
115 | """
116 | ).lstrip()
117 |
118 | assert r.status_code == 200
119 | assert r.text == content
120 | assert r.headers["content-type"] == "application/xml"
121 | assert r.headers["content-length"] == str(len(content))
122 |
123 |
124 | @pytest.mark.asyncio
125 | async def test_sitemap_async_items() -> None:
126 | """
127 | `.items()` supports returning an awaitable of items.
128 | """
129 |
130 | class Sitemap(asgi_sitemaps.Sitemap[str]):
131 | async def items(self) -> List[str]:
132 | return ["/", "/about"]
133 |
134 | def location(self, item: str) -> str:
135 | return item
136 |
137 | app = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io")
138 |
139 | async with httpx.AsyncClient(app=app) as client:
140 | r = await client.get("http://testserver")
141 |
142 | content = dedent(
143 | """
144 |
145 |
146 |
147 | http://example.io/
148 | 0.5
149 |
150 |
151 | http://example.io/about
152 | 0.5
153 |
154 |
155 | """
156 | ).lstrip()
157 |
158 | assert r.status_code == 200
159 | assert r.text == content
160 | assert r.headers["content-type"] == "application/xml"
161 | assert r.headers["content-length"] == str(len(content))
162 |
163 |
164 | @pytest.mark.asyncio
165 | async def test_sitemap_async_iterable_items() -> None:
166 | """
167 | `.items()` supports returning async iterables.
168 | """
169 |
170 | class Sitemap(asgi_sitemaps.Sitemap[str]):
171 | async def items(self) -> AsyncIterator[str]:
172 | for item in ["/", "/about"]:
173 | yield item
174 |
175 | def location(self, item: str) -> str:
176 | return item
177 |
178 | app = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io")
179 |
180 | async with httpx.AsyncClient(app=app) as client:
181 | r = await client.get("http://testserver")
182 |
183 | content = dedent(
184 | """
185 |
186 |
187 |
188 | http://example.io/
189 | 0.5
190 |
191 |
192 | http://example.io/about
193 | 0.5
194 |
195 |
196 | """
197 | ).lstrip()
198 |
199 | assert r.status_code == 200
200 | assert r.text == content
201 | assert r.headers["content-type"] == "application/xml"
202 | assert r.headers["content-length"] == str(len(content))
203 |
204 |
205 | @pytest.mark.asyncio
206 | async def test_sitemap_sections() -> None:
207 | """
208 | Multiple sitemap sections can be provided.
209 | """
210 |
211 | class StaticSitemap(asgi_sitemaps.Sitemap[str]):
212 | def items(self) -> List[str]:
213 | return ["/", "/about"]
214 |
215 | def location(self, item: str) -> str:
216 | return item
217 |
218 | class BlogSitemap(asgi_sitemaps.Sitemap[str]):
219 | def items(self) -> List[str]:
220 | return ["/blog/articles/"]
221 |
222 | def location(self, item: str) -> str:
223 | return item
224 |
225 | app = asgi_sitemaps.SitemapApp(
226 | [StaticSitemap(), BlogSitemap()], domain="example.io"
227 | )
228 |
229 | async with httpx.AsyncClient(app=app) as client:
230 | r = await client.get("http://testserver")
231 |
232 | content = dedent(
233 | """
234 |
235 |
236 |
237 | http://example.io/
238 | 0.5
239 |
240 |
241 | http://example.io/about
242 | 0.5
243 |
244 |
245 | http://example.io/blog/articles/
246 | 0.5
247 |
248 |
249 | """
250 | ).lstrip()
251 |
252 | assert r.status_code == 200
253 | assert r.text == content
254 | assert r.headers["content-type"] == "application/xml"
255 | assert r.headers["content-length"] == str(len(content))
256 |
257 |
258 | @pytest.mark.asyncio
259 | async def test_sitemap_scope() -> None:
260 | """
261 | Sitemaps can use `self.scope` to access the ASGI scope.
262 | """
263 |
264 | class Sitemap(asgi_sitemaps.Sitemap[str]):
265 | def items(self) -> List[str]:
266 | return ["home"]
267 |
268 | def location(self, name: str) -> str:
269 | request = Request(self.scope)
270 | return URL(request.url_for(name)).path
271 |
272 | sitemap = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io")
273 |
274 | async def home() -> Any:
275 | ... # pragma: no cover
276 |
277 | routes = [Route("/", home, name="home"), Route("/sitemap.xml", sitemap)]
278 | app = Starlette(routes=routes)
279 |
280 | async with httpx.AsyncClient(app=app) as client:
281 | r = await client.get("http://testserver/sitemap.xml")
282 |
283 | content = dedent(
284 | """
285 |
286 |
287 |
288 | http://example.io/
289 | 0.5
290 |
291 |
292 | """
293 | ).lstrip()
294 |
295 | assert r.text == content
296 |
--------------------------------------------------------------------------------
/tests/test_unit.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import asgi_sitemaps
4 | from asgi_sitemaps._generation import get_fields
5 |
6 |
7 | def test_invalid_absolute_location() -> None:
8 | """
9 | Location cannot be a full URL with scheme or domain.
10 | """
11 |
12 | class Sitemap(asgi_sitemaps.Sitemap[str]):
13 | def location(self, path: str) -> str:
14 | return "https://example.org{path}"
15 |
16 | sitemap = Sitemap()
17 | with pytest.raises(ValueError):
18 | get_fields(sitemap, "/", scope={"scheme": "http"}, domain="example.io")
19 |
--------------------------------------------------------------------------------