├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── ci └── azure-pipelines.yml ├── requirements.txt ├── setup.cfg ├── setup.py ├── src └── asgi_sitemaps │ ├── __init__.py │ ├── __version__.py │ ├── _app.py │ ├── _generation.py │ ├── _models.py │ ├── _types.py │ └── py.typed └── tests ├── __init__.py ├── test_integration.py └── test_unit.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Tooling. 2 | .coverage 3 | venv*/ 4 | 5 | # Caches. 6 | __pycache__/ 7 | *.pyc 8 | .mypy_cache/ 9 | .pytest_cache/ 10 | 11 | # Packaging. 12 | build/ 13 | dist/ 14 | *.egg-info/ 15 | 16 | # Private. 17 | .env 18 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). 6 | 7 | ## 1.0 - 2022-02-13 8 | 9 | ### Added 10 | 11 | - _Now marked as Production/Stable software._ (Pull #14) 12 | - Add official support for Python 3.9 and Python 3.10. (Pull #13) 13 | 14 | ## 0.3.2 - 2020-07-07 15 | 16 | ### Fixed 17 | 18 | - Fix support for async items. (Pull #9) 19 | 20 | ## 0.3.1 - 2020-07-05 21 | 22 | ### Fixed 23 | 24 | - Fix `Scope` type hint: values are now `Any`. 25 | 26 | ## 0.3.0 - 2020-07-05 27 | 28 | This release changes the approach from "scrape the ASGI app to gather URLs" to a programmatic class-based API inspired by Django's sitemap framework. 29 | 30 | As such, the command line application does not exist anymore. Users are expected to define `Sitemap` classes, compose them into a `SitemapApp` endpoint, and add that to their ASGI app routing table. 31 | 32 | See the new `README.md` documentation for more information. 33 | 34 | ### Changed 35 | 36 | - Switch to a class-based dynamic endpoint API. (Pull #4) 37 | 38 | ## 0.2.0 - 2020-06-01 39 | 40 | ### Changed 41 | 42 | - Project was renamed from `sitemaps` to `asgi-sitemaps` - sitemap generation for ASGI apps. (Pull #2) 43 | - Change options of CLI and programmatic API to fit new "ASGI-only" project scope. (Pull #2) 44 | - CLI now reads from stdin (for `--check` mode) and outputs sitemap to stdout. (Pull #2) 45 | 46 | ### Removed 47 | 48 | - Drop support for crawling arbitrary remote servers. (Pull #2) 49 | 50 | ### Fixed 51 | 52 | - Don't include non-200 or non-HTML URLs in sitemap. (Pull #2) 53 | 54 | ## 0.1.0 - 2020-05-31 55 | 56 | ### Added 57 | 58 | - Initial implementation: CLI and programmatic async API. 59 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Florimond Manca 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft src 2 | include README.md 3 | include CHANGELOG.md 4 | include LICENSE 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | venv = venv 2 | bin = ${venv}/bin/ 3 | pysources = src tests/ 4 | 5 | build: 6 | ${bin}python setup.py sdist bdist_wheel 7 | ${bin}twine check dist/* 8 | rm -r build 9 | 10 | check: 11 | ${bin}black --check --diff --target-version=py37 ${pysources} 12 | ${bin}flake8 ${pysources} 13 | ${bin}mypy ${pysources} 14 | ${bin}isort --check --diff ${pysources} 15 | 16 | install: 17 | python3 -m venv ${venv} 18 | ${bin}pip install -U pip wheel 19 | ${bin}pip install -r requirements.txt 20 | 21 | format: 22 | ${bin}autoflake --in-place --recursive ${pysources} 23 | ${bin}isort ${pysources} 24 | ${bin}black --target-version=py37 ${pysources} 25 | 26 | publish: 27 | ${bin}twine upload dist/* 28 | 29 | test: 30 | ${bin}pytest 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # asgi-sitemaps 2 | 3 | [![Build Status](https://dev.azure.com/florimondmanca/public/_apis/build/status/florimondmanca.asgi-sitemaps?branchName=master)](https://dev.azure.com/florimondmanca/public/_build/latest?definitionId=11&branchName=master) 4 | [![Coverage](https://codecov.io/gh/florimondmanca/asgi-sitemaps/branch/master/graph/badge.svg)](https://codecov.io/gh/florimondmanca/asgi-sitemaps) 5 | ![Python versions](https://img.shields.io/pypi/pyversions/asgi-sitemaps.svg) 6 | [![Package version](https://badge.fury.io/py/asgi-sitemaps.svg)](https://pypi.org/project/asgi-sitemaps) 7 | 8 | [Sitemap](https://www.sitemaps.org) generation for ASGI applications. Inspired by [Django's sitemap framework](https://docs.djangoproject.com/en/3.0/ref/contrib/sitemaps/). 9 | 10 | **Contents** 11 | 12 | - [Features](#features) 13 | - [Installation](#installation) 14 | - [Quickstart](#quickstart) 15 | - [How-To](#how-to) 16 | - [Sitemap sections](#sitemap-sections) 17 | - [Dynamic generation from database queries](#dynamic-generation-from-database-queries) 18 | - [Advanced web framework integration](#advanced-web-framework-integration) 19 | - [API Reference](#api-reference) 20 | - [`Sitemap`](#class-sitemap) 21 | - [`SitemapApp`](#class-sitemapapp) 22 | 23 | ## Features 24 | 25 | - Build and compose sitemap sections into a single dynamic ASGI endpoint. 26 | - Supports drawing sitemap items from a variety of sources (static lists, (async) ORM queries, etc). 27 | - Compatible with any ASGI framework. 28 | - Fully type annotated. 29 | - 100% test coverage. 30 | 31 | ## Installation 32 | 33 | Install with pip: 34 | 35 | ```shell 36 | $ pip install 'asgi-sitemaps==1.*' 37 | ``` 38 | 39 | `asgi-sitemaps` requires Python 3.7+. 40 | 41 | ## Quickstart 42 | 43 | Let's build a static sitemap for a "Hello, world!" application. The sitemap will contain a single URL entry for the home `/` endpoint. 44 | 45 | Here is the project file structure: 46 | 47 | ```console 48 | . 49 | └── server 50 | ├── __init__.py 51 | ├── app.py 52 | └── sitemap.py 53 | ``` 54 | 55 | First, declare a sitemap section by subclassing `Sitemap`, then wrap it in a `SitemapApp`: 56 | 57 | ```python 58 | # server/sitemap.py 59 | import asgi_sitemaps 60 | 61 | class Sitemap(asgi_sitemaps.Sitemap): 62 | def items(self): 63 | return ["/"] 64 | 65 | def location(self, item: str): 66 | return item 67 | 68 | def changefreq(self, item: str): 69 | return "monthly" 70 | 71 | sitemap = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io") 72 | ``` 73 | 74 | Now, register the `sitemap` endpoint as a route onto your ASGI app. For example, if using Starlette: 75 | 76 | ```python 77 | # server/app.py 78 | from starlette.applications import Starlette 79 | from starlette.responses import PlainTextResponse 80 | from starlette.routing import Route 81 | from .sitemap import sitemap 82 | 83 | async def home(request): 84 | return PlainTextResponse("Hello, world!") 85 | 86 | routes = [ 87 | Route("/", home), 88 | Route("/sitemap.xml", sitemap), 89 | ] 90 | 91 | app = Starlette(routes=routes) 92 | ``` 93 | 94 | Serve the app using `$ uvicorn server.app:app`, then request the sitemap: 95 | 96 | ```bash 97 | curl http://localhost:8000/sitemap.xml 98 | ``` 99 | 100 | ```xml 101 | 102 | 103 | 104 | http://example.io/ 105 | monthly 106 | 0.5 107 | 108 | 109 | ``` 110 | 111 | Tada! 112 | 113 | To learn more: 114 | 115 | - See [How-To](#how-to) for more advanced usage, including splitting the sitemap in multiple sections, and dynamically generating entries from database queries. 116 | - See the [`Sitemap` API reference](#class-sitemap) for all supported sitemap options. 117 | 118 | ## How-To 119 | 120 | ### Sitemap sections 121 | 122 | You can combine multiple sitemap classes into a single sitemap endpoint. This is useful to split the sitemap in multiple sections that may have different `items()` and/or sitemap attributes. Such sections could be static pages, blog posts, recent articles, etc. 123 | 124 | To do so, declare multiple sitemap classes, then pass them as a list to `SitemapApp`: 125 | 126 | ```python 127 | # server/sitemap.py 128 | import asgi_sitemaps 129 | 130 | class StaticSitemap(asgi_sitemaps.Sitemap): 131 | ... 132 | 133 | class BlogSitemap(asgi_sitemaps.Sitemap): 134 | ... 135 | 136 | sitemap = asgi_sitemaps.SitemapApp([StaticSitemap(), BlogSitemap()], domain="example.io") 137 | ``` 138 | 139 | Entries from each sitemap will be concatenated when building the final `sitemap.xml`. 140 | 141 | ### Dynamic generation from database queries 142 | 143 | `Sitemap.items()` supports consuming any async iterable. This means you can easily integrate with an async database client or ORM so that `Sitemap.items()` fetches and returns relevant rows for generating your sitemap. 144 | 145 | Here's an example using [Databases](https://github.com/encode/databases), assuming you have a `Database` instance in `server/resources.py`: 146 | 147 | ```python 148 | # server/sitemap.py 149 | import asgi_sitemaps 150 | from .resources import database 151 | 152 | class Sitemap(asgi_sitemaps.Sitemap): 153 | async def items(self): 154 | query = "SELECT permalink, updated_at FROM articles;" 155 | return await database.fetch_all(query) 156 | 157 | def location(self, row: dict): 158 | return row["permalink"] 159 | ``` 160 | 161 | ### Advanced web framework integration 162 | 163 | While `asgi-sitemaps` is framework-agnostic, you can use the [`.scope` attribute](#scope) available on `Sitemap` instances to feed the ASGI scope into your framework-specific APIs for inspecting and manipulating request information. 164 | 165 | Here is an example with [Starlette](https://www.starlette.io) where we build sitemap of static pages. To decouple from the raw URL paths, pages are referred to by view name. We reverse-lookup their URLs by building a `Request` instance from the ASGI `.scope`, and using `.url_for()`: 166 | 167 | ```python 168 | # server/sitemap.py 169 | import asgi_sitemaps 170 | from starlette.datastructures import URL 171 | from starlette.requests import Request 172 | 173 | class StaticSitemap(asgi_sitemaps.Sitemap): 174 | def items(self): 175 | return ["home", "about", "blog:home"] 176 | 177 | def location(self, name: str): 178 | request = Request(scope=self.scope) 179 | url = request.url_for(name) 180 | return URL(url).path 181 | ``` 182 | 183 | The corresponding Starlette routing table could look something like this: 184 | 185 | ```python 186 | # server/routes.py 187 | from starlette.routing import Mount, Route 188 | from . import views 189 | from .sitemap import sitemap 190 | 191 | routes = [ 192 | Route("/", views.home, name="home"), 193 | Route("/about", views.about, name="about"), 194 | Route("/blog/", views.blog_home, name="blog:home"), 195 | Route("/sitemap.xml", sitemap), 196 | ] 197 | ``` 198 | 199 | ## API Reference 200 | 201 | ### _class_ `Sitemap` 202 | 203 | Represents a source of sitemap entries. 204 | 205 | You can specify the type `T` of sitemap items for extra type safety: 206 | 207 | ```python 208 | import asgi_sitemaps 209 | 210 | class MySitemap(asgi_sitemaps.Sitemap[str]): 211 | ... 212 | ``` 213 | 214 | #### _async_ `items` 215 | 216 | Signature: `async def () -> Union[Iterable[T], AsyncIterable[T]]` 217 | 218 | _(**Required**)_ Return an [iterable](https://docs.python.org/3/glossary.html#term-iterable) or an [asynchronous iterable](https://docs.python.org/3/glossary.html#term-asynchronous-iterable) of items of the same type. Each item will be passed as-is to `.location()`, `.lastmod()`, `.changefreq()`, and `.priority()`. 219 | 220 | Examples: 221 | 222 | ```python 223 | # Simplest usage: return a list 224 | def items(self) -> List[str]: 225 | return ["/", "/contact"] 226 | 227 | # Async operations are also supported 228 | async def items(self) -> List[dict]: 229 | query = "SELECT permalink, updated_at FROM pages;" 230 | return await database.fetch_all(query) 231 | 232 | # Sync and async generators are also supported 233 | async def items(self) -> AsyncIterator[dict]: 234 | query = "SELECT permalink, updated_at FROM pages;" 235 | async for row in database.aiter_rows(query): 236 | yield row 237 | ``` 238 | 239 | #### `location` 240 | 241 | Signature: `def (item: T) -> str` 242 | 243 | _(**Required**)_ Return the absolute path of a sitemap item. 244 | 245 | "Absolute path" means an URL path without a protocol or domain. For example: `/blog/my-article`. (So `https://mydomain.com/blog/my-article` is not a valid location, nor is `mydomain.com/blog/my-article`.) 246 | 247 | #### `lastmod` 248 | 249 | Signature: `def (item: T) -> Optional[datetime.datetime]` 250 | 251 | _(Optional)_ Return the [date of last modification](https://www.sitemaps.org/protocol.html#lastmoddef) of a sitemap item as a [`datetime`](https://docs.python.org/3/library/datetime.html#datetime.datetime) object, or `None` (the default) for no `lastmod` field. 252 | 253 | #### `changefreq` 254 | 255 | Signature: `def (item: T) -> Optional[str]` 256 | 257 | _(Optional)_ Return the [change frequency](https://www.sitemaps.org/protocol.html#changefreqdef) of a sitemap item. 258 | 259 | Possible values are: 260 | 261 | - `None` - No `changefreq` field (the default). 262 | - `"always"` 263 | - `"hourly"` 264 | - `"daily"` 265 | - `"weekly"` 266 | - `"monthly"` 267 | - `"yearly"` 268 | - `"never"` 269 | 270 | #### `priority` 271 | 272 | Signature: `def (item: T) -> float` 273 | 274 | _(Optional)_ Return the [priority](https://www.sitemaps.org/protocol.html#prioritydef) of a sitemap item. Must be between 0 and 1. Defaults to `0.5`. 275 | 276 | #### `protocol` 277 | 278 | Type: `str` 279 | 280 | _(Optional)_ This attribute defines the protocol used to build URLs of the sitemap. 281 | 282 | Possible values are: 283 | 284 | - `"auto"` - The protocol with which the sitemap was requested (the default). 285 | - `"http"` 286 | - `"https"` 287 | 288 | #### `scope` 289 | 290 | This property returns the [ASGI scope](https://asgi.readthedocs.io/en/latest/specs/www.html#connection-scope) of the current HTTP request. 291 | 292 | ### _class_ `SitemapApp` 293 | 294 | An ASGI application that responds to HTTP requests with the `sitemap.xml` contents of the sitemap. 295 | 296 | Parameters: 297 | 298 | - _(**Required**)_ `sitemaps` - A `Sitemap` object or a list of `Sitemap` objects, used to generate sitemap entries. 299 | - _(**Required**)_ `domain` - The domain to use when generating sitemap URLs. 300 | 301 | Examples: 302 | 303 | ```python 304 | sitemap = SitemapApp(Sitemap(), domain="mydomain.com") 305 | sitemap = SitemapApp([StaticSitemap(), BlogSitemap()], domain="mydomain.com") 306 | ``` 307 | 308 | ## License 309 | 310 | MIT 311 | -------------------------------------------------------------------------------- /ci/azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | resources: 2 | repositories: 3 | - repository: templates 4 | type: github 5 | endpoint: github 6 | name: florimondmanca/azure-pipelines-templates 7 | ref: refs/tags/5.0 8 | 9 | trigger: 10 | - master 11 | - refs/tags/* 12 | 13 | pr: 14 | - master 15 | 16 | variables: 17 | - name: CI 18 | value: "true" 19 | - name: PIP_CACHE_DIR 20 | value: $(Pipeline.Workspace)/.cache/pip 21 | - group: pypi-credentials 22 | 23 | stages: 24 | - stage: test 25 | jobs: 26 | - template: job--python-check.yml@templates 27 | parameters: 28 | pythonVersion: "3.10" 29 | 30 | - template: job--python-test.yml@templates 31 | parameters: 32 | jobs: 33 | py37: 34 | py310: 35 | coverage: true 36 | 37 | - stage: publish 38 | condition: startsWith(variables['Build.SourceBranch'], 'refs/tags/') 39 | jobs: 40 | - template: job--python-publish.yml@templates 41 | parameters: 42 | pythonVersion: "3.10" 43 | token: $(pypiToken) 44 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | 3 | # Build and publishing. 4 | twine 5 | wheel 6 | 7 | # Tooling and tests. 8 | autoflake 9 | black==22.1.0 10 | flake8==4.* 11 | httpx==0.22.* 12 | isort==5.* 13 | mypy 14 | pytest==7.* 15 | pytest-asyncio==0.18.* 16 | pytest-cov 17 | seed-isort-config 18 | starlette==0.18.* 19 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = W503, E203, B305 3 | max-line-length = 88 4 | 5 | [mypy] 6 | disallow_untyped_defs = True 7 | ignore_missing_imports = True 8 | 9 | [tool:isort] 10 | profile = black 11 | known_first_party = asgi_sitemaps,tests 12 | known_third_party = httpx,pytest,setuptools,starlette 13 | 14 | [tool:pytest] 15 | addopts = 16 | -rxXs 17 | --cov=src 18 | --cov=tests 19 | --cov-report=term-missing 20 | --cov-fail-under=100 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import Path 3 | 4 | from setuptools import find_packages, setup 5 | 6 | 7 | def get_version(package: str) -> str: 8 | version = (Path("src") / package / "__version__.py").read_text() 9 | match = re.search("__version__ = ['\"]([^'\"]+)['\"]", version) 10 | assert match is not None 11 | return match.group(1) 12 | 13 | 14 | def get_long_description() -> str: 15 | with open("README.md", encoding="utf8") as readme: 16 | with open("CHANGELOG.md", encoding="utf8") as changelog: 17 | return readme.read() + "\n\n" + changelog.read() 18 | 19 | 20 | setup( 21 | name="asgi-sitemaps", 22 | version=get_version("asgi_sitemaps"), 23 | description="Sitemap generation for ASGI applications.", 24 | long_description=get_long_description(), 25 | long_description_content_type="text/markdown", 26 | url="http://github.com/florimondmanca/asgi-sitemaps", 27 | author="Florimond Manca", 28 | author_email="florimond.manca@protonmail.com", 29 | packages=find_packages("src"), 30 | package_dir={"": "src"}, 31 | include_package_data=True, 32 | zip_safe=False, 33 | install_requires=[], 34 | python_requires=">=3.7", 35 | license="MIT", 36 | classifiers=[ 37 | "Development Status :: 5 - Production/Stable", 38 | "Intended Audience :: Developers", 39 | "Operating System :: OS Independent", 40 | "Framework :: AsyncIO", 41 | "Programming Language :: Python :: 3", 42 | "Programming Language :: Python :: 3 :: Only", 43 | "Programming Language :: Python :: 3.7", 44 | "Programming Language :: Python :: 3.8", 45 | "Programming Language :: Python :: 3.9", 46 | "Programming Language :: Python :: 3.10", 47 | ], 48 | ) 49 | -------------------------------------------------------------------------------- /src/asgi_sitemaps/__init__.py: -------------------------------------------------------------------------------- 1 | from .__version__ import __version__ 2 | from ._app import SitemapApp 3 | from ._models import Sitemap 4 | 5 | __all__ = [ 6 | "__version__", 7 | "Sitemap", 8 | "SitemapApp", 9 | ] 10 | -------------------------------------------------------------------------------- /src/asgi_sitemaps/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.0.0" 2 | -------------------------------------------------------------------------------- /src/asgi_sitemaps/_app.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Sequence, Union 2 | 3 | from ._generation import generate_sitemap 4 | from ._models import Sitemap 5 | from ._types import Scope 6 | 7 | 8 | class SitemapApp: 9 | def __init__( 10 | self, sitemaps: Union[Sitemap, Sequence[Sitemap]], *, domain: str 11 | ) -> None: 12 | self._sitemaps = [sitemaps] if isinstance(sitemaps, Sitemap) else sitemaps 13 | self._domain = domain 14 | 15 | async def __call__(self, scope: Scope, receive: Callable, send: Callable) -> None: 16 | assert scope["type"] == "http" 17 | 18 | content = await generate_sitemap( 19 | self._sitemaps, scope=scope, domain=self._domain 20 | ) 21 | 22 | headers = [ 23 | [b"content-type", b"application/xml"], 24 | [b"content-length", b"%d" % len(content)], 25 | ] 26 | 27 | message = await receive() 28 | assert message["type"] == "http.request" 29 | await send({"type": "http.response.start", "status": 200, "headers": headers}) 30 | await send({"type": "http.response.body", "body": content}) 31 | -------------------------------------------------------------------------------- /src/asgi_sitemaps/_generation.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from typing import ( 3 | AsyncIterable, 4 | AsyncIterator, 5 | Awaitable, 6 | Dict, 7 | Iterable, 8 | Sequence, 9 | cast, 10 | ) 11 | from urllib.parse import urljoin, urlsplit 12 | 13 | from ._models import SCOPE_CTX_VAR, Sitemap 14 | from ._types import ItemsTypes, Scope, T 15 | 16 | 17 | async def generate_sitemap( 18 | sitemaps: Sequence[Sitemap], *, scope: Scope, domain: str 19 | ) -> bytes: 20 | SCOPE_CTX_VAR.set(scope) 21 | 22 | async def _lines() -> AsyncIterator[bytes]: 23 | yield b'' 24 | yield b'' 25 | 26 | for sitemap in sitemaps: 27 | async for item in _ensure_async_iterator(sitemap.items()): 28 | yield 4 * b" " + b"" 29 | 30 | fields = get_fields(sitemap, item, scope=scope, domain=domain) 31 | for name, value in fields.items(): 32 | yield 8 * b" " + f"<{name}>{value}".encode("utf-8") 33 | 34 | yield 4 * b" " + b"" 35 | 36 | yield b"" 37 | yield b"" 38 | 39 | return b"\n".join([line async for line in _lines()]) 40 | 41 | 42 | async def _ensure_async_iterator(items: ItemsTypes[T]) -> AsyncIterator[T]: 43 | if hasattr(items, "__aiter__"): 44 | items = cast(AsyncIterable[T], items) 45 | async for item in items: 46 | yield item 47 | elif inspect.isawaitable(items): 48 | items = cast(Awaitable[Iterable[T]], items) 49 | for item in await items: 50 | yield item 51 | else: 52 | items = cast(Iterable[T], items) 53 | for item in items: 54 | yield item 55 | 56 | 57 | def get_fields( 58 | sitemap: Sitemap[T], item: T, *, scope: Scope, domain: str 59 | ) -> Dict[str, str]: 60 | if sitemap.protocol == "auto": 61 | protocol = scope["scheme"] 62 | else: 63 | protocol = sitemap.protocol 64 | 65 | location = sitemap.location(item) 66 | lastmod = sitemap.lastmod(item) 67 | changefreq = sitemap.changefreq(item) 68 | priority = sitemap.priority(item) 69 | 70 | r = urlsplit(location) 71 | if r.scheme or r.netloc: 72 | raise ValueError(f"Location contains scheme or domain: {location}") 73 | 74 | fields = {} 75 | fields["loc"] = urljoin(f"{protocol}://{domain}", location) 76 | if lastmod is not None: 77 | fields["lastmod"] = lastmod.strftime("%Y-%m-%d") 78 | if changefreq is not None: 79 | fields["changefreq"] = changefreq 80 | fields["priority"] = str(priority) 81 | 82 | return fields 83 | -------------------------------------------------------------------------------- /src/asgi_sitemaps/_models.py: -------------------------------------------------------------------------------- 1 | import contextvars 2 | import datetime as dt 3 | from typing import Generic, Optional 4 | 5 | from ._types import ItemsTypes, Scope, T 6 | 7 | SCOPE_CTX_VAR = contextvars.ContextVar[Scope]("asgi_sitemaps.scope") 8 | 9 | 10 | class Sitemap(Generic[T]): 11 | protocol = "auto" 12 | 13 | def __init__(self) -> None: 14 | assert self.protocol in ("http", "https", "auto") 15 | 16 | def items(self) -> ItemsTypes: 17 | raise NotImplementedError # pragma: no cover 18 | 19 | def location(self, item: T) -> str: 20 | raise NotImplementedError # pragma: no cover 21 | 22 | def lastmod(self, item: T) -> Optional[dt.datetime]: 23 | return None 24 | 25 | def changefreq(self, item: T) -> Optional[str]: 26 | return None 27 | 28 | def priority(self, item: T) -> float: 29 | return 0.5 30 | 31 | @property 32 | def scope(self) -> Scope: 33 | try: 34 | return SCOPE_CTX_VAR.get() 35 | except LookupError: # pragma: no cover 36 | raise RuntimeError("scope accessed outside of an ASGI request") 37 | -------------------------------------------------------------------------------- /src/asgi_sitemaps/_types.py: -------------------------------------------------------------------------------- 1 | from typing import Any, AsyncIterable, Awaitable, Dict, Iterable, TypeVar, Union 2 | 3 | T = TypeVar("T") 4 | ItemsTypes = Union[Iterable[T], Awaitable[Iterable[T]], AsyncIterable[T]] 5 | Scope = Dict[str, Any] 6 | -------------------------------------------------------------------------------- /src/asgi_sitemaps/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/florimondmanca/asgi-sitemaps/009b0f04ef4bca29bcfb8304411b27b9b2bda64e/src/asgi_sitemaps/py.typed -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/florimondmanca/asgi-sitemaps/009b0f04ef4bca29bcfb8304411b27b9b2bda64e/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | from textwrap import dedent 3 | from typing import Any, AsyncIterator, List, Optional 4 | 5 | import httpx 6 | import pytest 7 | from starlette.applications import Starlette 8 | from starlette.datastructures import URL 9 | from starlette.requests import Request 10 | from starlette.routing import Route 11 | 12 | import asgi_sitemaps 13 | 14 | 15 | @pytest.mark.asyncio 16 | async def test_sitemap() -> None: 17 | """ 18 | A basic example returns expected sitemap XML content. 19 | """ 20 | 21 | class Sitemap(asgi_sitemaps.Sitemap[str]): 22 | def items(self) -> List[str]: 23 | return ["/", "/about"] 24 | 25 | def location(self, item: str) -> str: 26 | return item 27 | 28 | app = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io") 29 | 30 | async with httpx.AsyncClient(app=app) as client: 31 | r = await client.get("http://testserver") 32 | 33 | content = dedent( 34 | """ 35 | 36 | 37 | 38 | http://example.io/ 39 | 0.5 40 | 41 | 42 | http://example.io/about 43 | 0.5 44 | 45 | 46 | """ 47 | ).lstrip() 48 | 49 | assert r.status_code == 200 50 | assert r.text == content 51 | assert r.headers["content-type"] == "application/xml" 52 | assert r.headers["content-length"] == str(len(content)) 53 | 54 | 55 | @pytest.mark.asyncio 56 | async def test_sitemap_fields() -> None: 57 | """ 58 | Custom sitemap fields behave as expected. 59 | """ 60 | 61 | class Sitemap(asgi_sitemaps.Sitemap[int]): 62 | protocol = "https" 63 | 64 | def items(self) -> List[int]: 65 | return list(range(3)) 66 | 67 | def location(self, k: int) -> str: 68 | return f"/page{k + 1}" 69 | 70 | def lastmod(self, k: int) -> Optional[dt.datetime]: 71 | if k % 3 == 0: 72 | return dt.datetime(2020, 1, 1) 73 | elif k % 3 == 1: 74 | return None 75 | else: 76 | return dt.datetime(2018, 3, 14) 77 | 78 | def changefreq(self, k: int) -> Optional[str]: 79 | if k % 3 == 0: 80 | return "daily" 81 | elif k % 3 == 1: 82 | return "monthly" 83 | else: 84 | return None 85 | 86 | def priority(self, k: int) -> float: 87 | return 0.7 88 | 89 | app = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io") 90 | 91 | async with httpx.AsyncClient(app=app) as client: 92 | r = await client.get("http://testserver") 93 | 94 | content = dedent( 95 | """ 96 | 97 | 98 | 99 | https://example.io/page1 100 | 2020-01-01 101 | daily 102 | 0.7 103 | 104 | 105 | https://example.io/page2 106 | monthly 107 | 0.7 108 | 109 | 110 | https://example.io/page3 111 | 2018-03-14 112 | 0.7 113 | 114 | 115 | """ 116 | ).lstrip() 117 | 118 | assert r.status_code == 200 119 | assert r.text == content 120 | assert r.headers["content-type"] == "application/xml" 121 | assert r.headers["content-length"] == str(len(content)) 122 | 123 | 124 | @pytest.mark.asyncio 125 | async def test_sitemap_async_items() -> None: 126 | """ 127 | `.items()` supports returning an awaitable of items. 128 | """ 129 | 130 | class Sitemap(asgi_sitemaps.Sitemap[str]): 131 | async def items(self) -> List[str]: 132 | return ["/", "/about"] 133 | 134 | def location(self, item: str) -> str: 135 | return item 136 | 137 | app = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io") 138 | 139 | async with httpx.AsyncClient(app=app) as client: 140 | r = await client.get("http://testserver") 141 | 142 | content = dedent( 143 | """ 144 | 145 | 146 | 147 | http://example.io/ 148 | 0.5 149 | 150 | 151 | http://example.io/about 152 | 0.5 153 | 154 | 155 | """ 156 | ).lstrip() 157 | 158 | assert r.status_code == 200 159 | assert r.text == content 160 | assert r.headers["content-type"] == "application/xml" 161 | assert r.headers["content-length"] == str(len(content)) 162 | 163 | 164 | @pytest.mark.asyncio 165 | async def test_sitemap_async_iterable_items() -> None: 166 | """ 167 | `.items()` supports returning async iterables. 168 | """ 169 | 170 | class Sitemap(asgi_sitemaps.Sitemap[str]): 171 | async def items(self) -> AsyncIterator[str]: 172 | for item in ["/", "/about"]: 173 | yield item 174 | 175 | def location(self, item: str) -> str: 176 | return item 177 | 178 | app = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io") 179 | 180 | async with httpx.AsyncClient(app=app) as client: 181 | r = await client.get("http://testserver") 182 | 183 | content = dedent( 184 | """ 185 | 186 | 187 | 188 | http://example.io/ 189 | 0.5 190 | 191 | 192 | http://example.io/about 193 | 0.5 194 | 195 | 196 | """ 197 | ).lstrip() 198 | 199 | assert r.status_code == 200 200 | assert r.text == content 201 | assert r.headers["content-type"] == "application/xml" 202 | assert r.headers["content-length"] == str(len(content)) 203 | 204 | 205 | @pytest.mark.asyncio 206 | async def test_sitemap_sections() -> None: 207 | """ 208 | Multiple sitemap sections can be provided. 209 | """ 210 | 211 | class StaticSitemap(asgi_sitemaps.Sitemap[str]): 212 | def items(self) -> List[str]: 213 | return ["/", "/about"] 214 | 215 | def location(self, item: str) -> str: 216 | return item 217 | 218 | class BlogSitemap(asgi_sitemaps.Sitemap[str]): 219 | def items(self) -> List[str]: 220 | return ["/blog/articles/"] 221 | 222 | def location(self, item: str) -> str: 223 | return item 224 | 225 | app = asgi_sitemaps.SitemapApp( 226 | [StaticSitemap(), BlogSitemap()], domain="example.io" 227 | ) 228 | 229 | async with httpx.AsyncClient(app=app) as client: 230 | r = await client.get("http://testserver") 231 | 232 | content = dedent( 233 | """ 234 | 235 | 236 | 237 | http://example.io/ 238 | 0.5 239 | 240 | 241 | http://example.io/about 242 | 0.5 243 | 244 | 245 | http://example.io/blog/articles/ 246 | 0.5 247 | 248 | 249 | """ 250 | ).lstrip() 251 | 252 | assert r.status_code == 200 253 | assert r.text == content 254 | assert r.headers["content-type"] == "application/xml" 255 | assert r.headers["content-length"] == str(len(content)) 256 | 257 | 258 | @pytest.mark.asyncio 259 | async def test_sitemap_scope() -> None: 260 | """ 261 | Sitemaps can use `self.scope` to access the ASGI scope. 262 | """ 263 | 264 | class Sitemap(asgi_sitemaps.Sitemap[str]): 265 | def items(self) -> List[str]: 266 | return ["home"] 267 | 268 | def location(self, name: str) -> str: 269 | request = Request(self.scope) 270 | return URL(request.url_for(name)).path 271 | 272 | sitemap = asgi_sitemaps.SitemapApp(Sitemap(), domain="example.io") 273 | 274 | async def home() -> Any: 275 | ... # pragma: no cover 276 | 277 | routes = [Route("/", home, name="home"), Route("/sitemap.xml", sitemap)] 278 | app = Starlette(routes=routes) 279 | 280 | async with httpx.AsyncClient(app=app) as client: 281 | r = await client.get("http://testserver/sitemap.xml") 282 | 283 | content = dedent( 284 | """ 285 | 286 | 287 | 288 | http://example.io/ 289 | 0.5 290 | 291 | 292 | """ 293 | ).lstrip() 294 | 295 | assert r.text == content 296 | -------------------------------------------------------------------------------- /tests/test_unit.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import asgi_sitemaps 4 | from asgi_sitemaps._generation import get_fields 5 | 6 | 7 | def test_invalid_absolute_location() -> None: 8 | """ 9 | Location cannot be a full URL with scheme or domain. 10 | """ 11 | 12 | class Sitemap(asgi_sitemaps.Sitemap[str]): 13 | def location(self, path: str) -> str: 14 | return "https://example.org{path}" 15 | 16 | sitemap = Sitemap() 17 | with pytest.raises(ValueError): 18 | get_fields(sitemap, "/", scope={"scheme": "http"}, domain="example.io") 19 | --------------------------------------------------------------------------------