├── .editorconfig ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.rst ├── LICENSE ├── README.rst ├── django_sitemaps.py └── pyproject.toml /.editorconfig: -------------------------------------------------------------------------------- 1 | # top-most EditorConfig file 2 | root = true 3 | 4 | [*] 5 | end_of_line = lf 6 | insert_final_newline = true 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | indent_style = space 10 | indent_size = 2 11 | 12 | [*.py] 13 | indent_size = 4 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py? 2 | *~ 3 | *.sw? 4 | \#*# 5 | /secrets.py 6 | .DS_Store 7 | ._* 8 | *.egg-info 9 | /MANIFEST 10 | /_build 11 | /build 12 | dist 13 | tests/test.zip 14 | /docs/_build 15 | /.eggs 16 | .coverage 17 | htmlcov 18 | venv 19 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: ".yarn/|yarn.lock|\\.min\\.(css|js)$" 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v4.4.0 5 | hooks: 6 | - id: check-added-large-files 7 | - id: check-builtin-literals 8 | - id: check-executables-have-shebangs 9 | - id: check-merge-conflict 10 | - id: check-toml 11 | - id: check-yaml 12 | - id: detect-private-key 13 | - id: end-of-file-fixer 14 | - id: mixed-line-ending 15 | - id: trailing-whitespace 16 | - repo: https://github.com/adamchainz/django-upgrade 17 | rev: 1.14.0 18 | hooks: 19 | - id: django-upgrade 20 | args: [--target-version, "3.2"] 21 | - repo: https://github.com/MarcoGorelli/absolufy-imports 22 | rev: v0.3.1 23 | hooks: 24 | - id: absolufy-imports 25 | - repo: https://github.com/charliermarsh/ruff-pre-commit 26 | rev: "v0.0.284" 27 | hooks: 28 | - id: ruff 29 | - repo: https://github.com/psf/black 30 | rev: 23.7.0 31 | hooks: 32 | - id: black 33 | - repo: https://github.com/pre-commit/mirrors-prettier 34 | rev: v3.0.1 35 | hooks: 36 | - id: prettier 37 | args: [--list-different, --no-semi] 38 | exclude: "^conf/|.*\\.html$" 39 | - repo: https://github.com/tox-dev/pyproject-fmt 40 | rev: 0.13.1 41 | hooks: 42 | - id: pyproject-fmt 43 | - repo: https://github.com/abravalheri/validate-pyproject 44 | rev: v0.13 45 | hooks: 46 | - id: validate-pyproject 47 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Change log 3 | ========== 4 | 5 | `Next version`_ 6 | ~~~~~~~~~~~~~~~ 7 | 8 | - Converted the package to hatchling and ruff. 9 | 10 | 11 | `2.0.1`_ (2022-01-25) 12 | ~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | - Fixed cases where empty priority tags were added if priority was an 15 | empty string (which is always the default case with Django sitemaps). 16 | - Added pre-commit, switched to a declarative setup, etc. 17 | - Made ``add_django_sitemap`` always build URLs using ``build_absolute_uri`` 18 | too to avoid problems when using ``django.contrib.sites``. 19 | 20 | 21 | `1.1`_ (2018-04-11) 22 | ~~~~~~~~~~~~~~~~~~~ 23 | 24 | - Added the ``lxml`` dependency to ``install_requires``. 25 | - Added documentation. 26 | - Added the ``robots_txt`` view for easily adding a ``/robots.txt`` 27 | view returning sitemap URLs. 28 | 29 | 30 | `1.0`_ (2017-03-29) 31 | ~~~~~~~~~~~~~~~~~~~ 32 | 33 | - Initial release! 34 | 35 | .. _1.0: https://github.com/matthiask/django-sitemaps/commit/df0841349 36 | .. _1.1: https://github.com/matthiask/django-sitemaps/compare/1.0...1.1 37 | .. _2.0.1: https://github.com/matthiask/django-sitemaps/compare/1.1...2.0.1 38 | .. _Next version: https://github.com/matthiask/django-sitemaps/compare/2.0.1...main 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Feinheit AG and individual contributors. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of Feinheit AG nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | django-sitemaps 3 | =============== 4 | 5 | ``sitemap.xml`` generation using lxml_ with support for alternates_. It 6 | uses Python 3's keyword-only arguments for self-documenting code. 7 | 8 | 9 | Installation 10 | ============ 11 | 12 | Simply ``pip install django-sitemaps``. The package consists of a single 13 | python module, ``django_sitemaps``, containing the single class; there's no 14 | additional configuration necessary. 15 | 16 | 17 | Usage 18 | ===== 19 | 20 | View:: 21 | 22 | from app.pages.sitemaps import PagesSitemap 23 | 24 | def sitemap(request): 25 | sitemap = Sitemap( 26 | # All URLs are passed through build_absolute_uri. 27 | build_absolute_uri=request.build_absolute_uri, 28 | ) 29 | 30 | # URLs can be added one-by-one. The only required argument 31 | # is the URL. All other arguments are keyword-only arguments. 32 | for p in Page.objects.active(): 33 | url = p.get_absolute_url() 34 | sitemap.add( 35 | url, 36 | changefreq='weekly', 37 | priority=0.5, 38 | lastmod=p.modification_date, 39 | alternates={ 40 | code: urljoin(domain, url) 41 | for code, domain in PAGE_DOMAINS[p.language].items() 42 | }, 43 | ) 44 | 45 | # Adding conventional Django sitemaps is supported. The 46 | # request argument is necessary because Django's sitemaps 47 | # depend on django.contrib.sites, resp. RequestSite. 48 | sitemap.add_django_sitemap(PagesSitemap, request=request) 49 | 50 | # You can also specify the site and protocol manually should you wish 51 | # to do so: 52 | sitemap.add_django_sitemap( 53 | PagesSitemap, site=...site..., protocol=request.scheme 54 | ) 55 | # Note! If you're omitting the request you *have* to specify site and 56 | # protocol yourself. 57 | 58 | # You could get the serialized XML... 59 | # ... = sitemap.serialize([pretty_print=False]) 60 | # ... or use the ``response`` helper to return a 61 | # ready-made ``HttpResponse``: 62 | return sitemap.response( 63 | # pretty_print is False by default 64 | pretty_print=settings.DEBUG, 65 | ) 66 | 67 | URLconf:: 68 | 69 | from django_sitemaps import robots_txt 70 | from app.views import sitemap 71 | 72 | urlpatterns = [ 73 | url(r'^sitemap\.xml$', sitemap), 74 | url(r'^robots\.txt$', robots_txt(timeout=86400)), 75 | ... 76 | ] 77 | 78 | The ``robots_txt`` function returns a view which can be used to generate 79 | a ``robots.txt`` file containing sitemap URLs. The default sitemap only 80 | contains:: 81 | 82 | User-agent: * 83 | Sitemap: :///sitemap.xml 84 | 85 | The list of sitemap URLs may be overridden by setting ``sitemaps``:: 86 | 87 | from django.urls import reverse_lazy 88 | 89 | urlpatterns = [ 90 | url(r'^robots\.txt$', robots_txt( 91 | timeout=86400, 92 | sitemaps=[ 93 | '/sitemap.xml', 94 | reverse_lazy('articles-sitemap'), 95 | ..., 96 | ], 97 | )), 98 | ] 99 | 100 | 101 | .. _alternates: https://support.google.com/webmasters/answer/2620865?hl=en 102 | .. _lxml: http://lxml.de/ 103 | -------------------------------------------------------------------------------- /django_sitemaps.py: -------------------------------------------------------------------------------- 1 | from calendar import timegm 2 | from datetime import date 3 | from types import SimpleNamespace 4 | 5 | from django.http import HttpResponse 6 | from django.utils.http import http_date 7 | from django.views.decorators.cache import cache_page 8 | from lxml import etree 9 | from lxml.builder import ElementMaker 10 | 11 | 12 | __all__ = ("Sitemap",) 13 | 14 | 15 | S = ElementMaker( 16 | namespace="http://www.sitemaps.org/schemas/sitemap/0.9", 17 | nsmap={ 18 | None: "http://www.sitemaps.org/schemas/sitemap/0.9", 19 | "xhtml": "http://www.w3.org/1999/xhtml", 20 | }, 21 | ) 22 | X = ElementMaker( 23 | namespace="http://www.w3.org/1999/xhtml", 24 | nsmap={"xhtml": "http://www.w3.org/1999/xhtml"}, 25 | ) 26 | 27 | 28 | class Sitemap: 29 | def __init__(self, *, build_absolute_uri): 30 | self.urls = [] 31 | self.build_absolute_uri = build_absolute_uri 32 | self.lastmod = None 33 | self.all_urls_lastmod = True 34 | 35 | def add( 36 | self, loc, *, changefreq=None, lastmod=None, priority=None, alternates=None 37 | ): 38 | children = [S.loc(self.build_absolute_uri(loc))] 39 | if changefreq is not None: 40 | children.append(S.changefreq(changefreq)) 41 | if isinstance(lastmod, date): 42 | new = timegm( 43 | lastmod.utctimetuple() 44 | if hasattr(lastmod, "utctimetuple") 45 | else lastmod.timetuple() 46 | ) 47 | self.lastmod = max(self.lastmod, new) if self.lastmod else new 48 | children.append(S.lastmod(lastmod.isoformat())) 49 | elif lastmod is not None: 50 | children.append(S.lastmod(lastmod)) 51 | else: 52 | self.all_urls_lastmod = False 53 | if priority: 54 | children.append(S.priority(str(priority))) 55 | 56 | for code, url in (alternates or {}).items(): 57 | children.append( 58 | X.link( 59 | { 60 | "rel": "alternate", 61 | "hreflang": code, 62 | "href": self.build_absolute_uri(url), 63 | } 64 | ) 65 | ) 66 | 67 | self.urls.append(S.url(*children)) 68 | 69 | def add_django_sitemap(self, sitemap): 70 | if callable(sitemap): 71 | sitemap = sitemap() 72 | 73 | for url in sitemap.get_urls(site=SimpleNamespace(domain="_"), protocol="_"): 74 | # Replace this with url["location"].removeprefix("_://_") when 75 | # supporting only Python 3.9 or better. 76 | loc = url["location"] 77 | if loc.startswith("_://_"): 78 | loc = loc[5:] 79 | self.add( 80 | loc, 81 | changefreq=url.get("changefreq"), 82 | lastmod=url.get("lastmod"), 83 | priority=url.get("priority"), 84 | ) 85 | 86 | def serialize(self, *, pretty_print=False): 87 | return etree.tostring( 88 | S.urlset(*self.urls), 89 | encoding="UTF-8", 90 | pretty_print=pretty_print, 91 | xml_declaration=True, 92 | ) 93 | 94 | def response(self, *, pretty_print=False): 95 | response = HttpResponse( 96 | self.serialize(pretty_print=pretty_print), content_type="application/xml" 97 | ) 98 | response["X-Robot-Tag"] = "noindex, noodp, noarchive" 99 | if self.all_urls_lastmod and self.lastmod is not None: 100 | response["Last-Modified"] = http_date(self.lastmod) 101 | return response 102 | 103 | 104 | def robots_txt(*, timeout=0, sitemaps=("/sitemap.xml",)): 105 | @cache_page(timeout) 106 | def view(request): 107 | lines = ["User-agent: *\n"] 108 | lines.extend( 109 | "Sitemap: %s\n" % request.build_absolute_uri(str(sitemap)) 110 | for sitemap in sitemaps 111 | ) 112 | return HttpResponse("".join(lines), content_type="text/plain") 113 | 114 | return view 115 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | build-backend = "hatchling.build" 3 | requires = [ 4 | "hatchling", 5 | ] 6 | 7 | [project] 8 | name = "django-sitemaps" 9 | version = "2.0.1" 10 | readme = "README.rst" 11 | license = {text="BSD-3-Clause"} 12 | authors = [ 13 | { name = "Matthias Kestenholz", email = "mk@feinheit.ch" }, 14 | ] 15 | requires-python = ">=3.8" 16 | classifiers = [ 17 | "Environment :: Web Environment", 18 | "Framework :: Django", 19 | "Intended Audience :: Developers", 20 | "License :: OSI Approved :: BSD License", 21 | "Operating System :: OS Independent", 22 | "Programming Language :: Python", 23 | "Programming Language :: Python :: 3 :: Only", 24 | "Programming Language :: Python :: 3.8", 25 | "Programming Language :: Python :: 3.9", 26 | "Programming Language :: Python :: 3.10", 27 | "Programming Language :: Python :: 3.11", 28 | "Topic :: Internet :: WWW/HTTP :: Dynamic Content", 29 | "Topic :: Software Development", 30 | "Topic :: Software Development :: Libraries :: Application Frameworks", 31 | ] 32 | dependencies = [ 33 | "lxml", 34 | ] 35 | [project.urls] 36 | Homepage = "https://github.com/matthiask/django-sitemaps/" 37 | 38 | [tool.hatch.build] 39 | include = [ 40 | "django_sitemaps.py" 41 | ] 42 | 43 | [tool.ruff] 44 | extend-select = [ 45 | # pyflakes, pycodestyle 46 | "F", "E", "W", 47 | # mmcabe 48 | "C90", 49 | # isort 50 | "I", 51 | # pep8-naming 52 | "N", 53 | # pyupgrade 54 | "UP", 55 | # flake8-2020 56 | "YTT", 57 | # flake8-boolean-trap 58 | "FBT", 59 | # flake8-bugbear 60 | "B", 61 | # flake8-comprehensions 62 | "C4", 63 | # flake8-django 64 | "DJ", 65 | # flake8-implicit-string-concatenation 66 | "ISC", 67 | # flake8-pie 68 | "PIE", 69 | # flake8-simplify 70 | "SIM", 71 | # flake8-gettext 72 | "INT", 73 | # pygrep-hooks 74 | "PGH", 75 | # pylint 76 | "PLC", "PLE", "PLW", 77 | # unused noqa 78 | "RUF100", 79 | ] 80 | extend-ignore = [ 81 | # Allow zip() without strict= 82 | "B905", 83 | # No line length errors 84 | "E501", 85 | ] 86 | fix = true 87 | show-fixes = true 88 | target-version = "py38" 89 | 90 | [tool.ruff.isort] 91 | combine-as-imports = true 92 | lines-after-imports = 2 93 | 94 | [tool.ruff.mccabe] 95 | max-complexity = 15 96 | 97 | [tool.ruff.per-file-ignores] 98 | "*/migrat*/*" = [ 99 | # Allow using PascalCase model names in migrations 100 | "N806", 101 | # Ignore the fact that migration files are invalid module names 102 | "N999", 103 | ] 104 | --------------------------------------------------------------------------------