├── .npmignore
├── .travis.yml
├── test
├── large-sitemap.xml
└── basic.js
├── CHANGELOG.md
├── LICENSE
├── package.json
├── index.js
└── README.md
/.npmignore:
--------------------------------------------------------------------------------
1 | .travis.yml
2 | test/
3 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | node_js:
3 | - lts/*
4 |
--------------------------------------------------------------------------------
/test/large-sitemap.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | https://bitmidi.com/sitemap-0.xml
5 | 2018-07-15
6 |
7 |
8 | https://bitmidi.com/sitemap-1.xml
9 | 2018-07-15
10 |
11 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4 |
5 | ## [3.1.0](https://github.com/feross/express-sitemap-xml/compare/v3.0.0...v3.1.0) (2024-04-19)
6 |
7 |
8 | ### Features
9 |
10 | * add image support ([5245d85](https://github.com/feross/express-sitemap-xml/commit/5245d851b08e929d27e1bc3cf9dbe2a09100072e))
11 |
12 |
13 | ### Bug Fixes
14 |
15 | * base url can be nested ([28e62df](https://github.com/feross/express-sitemap-xml/commit/28e62dfebac163bf1938ccfd35bdab1c87f0a8aa))
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) Feross Aboukhadijeh
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | this software and associated documentation files (the "Software"), to deal in
7 | the Software without restriction, including without limitation the rights to
8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "express-sitemap-xml",
3 | "description": "Express middleware to serve `sitemap.xml` from a list of URLs",
4 | "version": "3.1.0",
5 | "author": {
6 | "name": "Feross Aboukhadijeh",
7 | "email": "feross@feross.org",
8 | "url": "https://feross.org"
9 | },
10 | "bugs": {
11 | "url": "https://github.com/feross/express-sitemap-xml/issues"
12 | },
13 | "dependencies": {
14 | "p-memoize": "^4.0.1",
15 | "xmlbuilder": "^15.1.1"
16 | },
17 | "devDependencies": {
18 | "common-tags": "^1.8.0",
19 | "standard": "*",
20 | "tape": "^5.0.1"
21 | },
22 | "homepage": "https://github.com/feross/express-sitemap-xml",
23 | "keywords": [
24 | "express",
25 | "google",
26 | "serve sitemap",
27 | "serve sitemap.xml",
28 | "site map",
29 | "site map xml",
30 | "sitemap",
31 | "sitemap generator",
32 | "sitemap xml",
33 | "sitemap.xml",
34 | "sitemaps",
35 | "xml"
36 | ],
37 | "license": "MIT",
38 | "main": "index.js",
39 | "repository": {
40 | "type": "git",
41 | "url": "git://github.com/feross/express-sitemap-xml.git"
42 | },
43 | "scripts": {
44 | "test": "standard && tape test/**/*.js"
45 | },
46 | "funding": [
47 | {
48 | "type": "github",
49 | "url": "https://github.com/sponsors/feross"
50 | },
51 | {
52 | "type": "patreon",
53 | "url": "https://www.patreon.com/feross"
54 | },
55 | {
56 | "type": "consulting",
57 | "url": "https://feross.org/support"
58 | }
59 | ]
60 | }
61 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | /*! express-sitemap-xml. MIT License. Feross Aboukhadijeh */
2 | module.exports = expressSitemapXml
3 | module.exports.buildSitemaps = buildSitemaps
4 |
5 | const builder = require('xmlbuilder')
6 | const pMemoize = require('p-memoize')
7 | const { URL } = require('url') // TODO: Remove once Node 8 support is dropped
8 |
9 | const MAX_SITEMAP_LENGTH = 50 * 1000 // Max URLs in a sitemap (defined by spec)
10 | const SITEMAP_URL_RE = /\/sitemap(-\d+)?\.xml/ // Sitemap url pattern
11 | const SITEMAP_MAX_AGE = 24 * 60 * 60 * 1000 // Cache sitemaps for 24 hours
12 |
13 | const TRAILING_SLASH_RE = /\/+$/
14 |
15 | function removeTrailingSlash (str) {
16 | return str.replace(TRAILING_SLASH_RE, '')
17 | }
18 |
19 | function expressSitemapXml (
20 | getUrls,
21 | base,
22 | { size = MAX_SITEMAP_LENGTH, maxAge = SITEMAP_MAX_AGE } = {}
23 | ) {
24 | if (typeof getUrls !== 'function') {
25 | throw new Error('Argument `getUrls` must be a function')
26 | }
27 | if (typeof base !== 'string') {
28 | throw new Error('Argument `base` must be a string')
29 | }
30 |
31 | async function loadSitemaps () {
32 | const urls = await getUrls()
33 | if (!Array.isArray(urls)) {
34 | throw new Error('async function `getUrls` must resolve to an Array')
35 | }
36 | return buildSitemaps(urls, base, size)
37 | }
38 |
39 | const memoizedLoad = pMemoize(loadSitemaps, { maxAge })
40 |
41 | return async (req, res, next) => {
42 | const isSitemapUrl = SITEMAP_URL_RE.test(req.url)
43 | if (isSitemapUrl) {
44 | const sitemaps = await memoizedLoad()
45 | if (sitemaps[req.url]) {
46 | res.setHeader('Content-Type', 'application/xml')
47 | return res.status(200).send(sitemaps[req.url])
48 | }
49 | }
50 | next()
51 | }
52 | }
53 |
54 | async function buildSitemaps (urls, base, size = MAX_SITEMAP_LENGTH) {
55 | const sitemaps = Object.create(null)
56 |
57 | if (urls.length <= size) {
58 | // If there is only one sitemap (i.e. there are less than 50,000 URLs)
59 | // then serve it directly at /sitemap.xml
60 | sitemaps['/sitemap.xml'] = buildSitemap(urls, base)
61 | } else {
62 | // Otherwise, serve a sitemap index at /sitemap.xml and sitemaps at
63 | // /sitemap-0.xml, /sitemap-1.xml, etc.
64 | for (let i = 0; i * size < urls.length; i++) {
65 | const start = i * size
66 | const selectedUrls = urls.slice(start, start + size)
67 | sitemaps[`/sitemap-${i}.xml`] = buildSitemap(selectedUrls, base)
68 | }
69 | sitemaps['/sitemap.xml'] = buildSitemapIndex(sitemaps, base)
70 | }
71 |
72 | return sitemaps
73 | }
74 |
75 | function buildSitemapIndex (sitemaps, base) {
76 | const sitemapObjs = Object.keys(sitemaps).map(sitemapUrl => {
77 | return {
78 | loc: toAbsolute(sitemapUrl, base),
79 | lastmod: getTodayStr()
80 | }
81 | })
82 |
83 | const sitemapIndexObj = {
84 | sitemapindex: {
85 | '@xmlns': 'http://www.sitemaps.org/schemas/sitemap/0.9',
86 | sitemap: sitemapObjs
87 | }
88 | }
89 |
90 | return buildXml(sitemapIndexObj)
91 | }
92 |
93 | function buildSitemap (urls, base) {
94 | const urlObjs = urls.map(url => {
95 | if (typeof url === 'string') {
96 | return {
97 | loc: toAbsolute(url, base)
98 | }
99 | }
100 |
101 | if (typeof url.url !== 'string') {
102 | throw new Error(
103 | `Invalid sitemap url object, missing 'url' property: ${JSON.stringify(
104 | url
105 | )}`
106 | )
107 | }
108 |
109 | const urlObj = {
110 | loc: toAbsolute(url.url, base)
111 | }
112 | if (url.lastMod === true) {
113 | urlObj.lastmod = getTodayStr()
114 | } else if (typeof url.lastMod === 'string' || url.lastMod instanceof Date) {
115 | urlObj.lastmod = dateToString(url.lastMod)
116 | }
117 |
118 | if (typeof url.changeFreq === 'string') {
119 | urlObj.changefreq = url.changeFreq
120 | }
121 |
122 | if (typeof url.image === 'string') {
123 | urlObj['image:image'] = {
124 | 'image:loc': toAbsolute(url.image, base)
125 | }
126 | } else if (Array.isArray(url.image)) {
127 | urlObj['image:image'] = url.image.map(image => {
128 | return {
129 | 'image:loc': toAbsolute(image, base)
130 | }
131 | })
132 | }
133 |
134 | return urlObj
135 | })
136 |
137 | const sitemapObj = {
138 | urlset: {
139 | '@xmlns': 'http://www.sitemaps.org/schemas/sitemap/0.9',
140 | url: urlObjs
141 | }
142 | }
143 |
144 | return buildXml(sitemapObj)
145 | }
146 |
147 | function buildXml (obj) {
148 | const opts = {
149 | encoding: 'utf-8'
150 | }
151 | const xml = builder.create(obj, opts)
152 | return xml.end({ pretty: true, allowEmpty: false })
153 | }
154 |
155 | function getTodayStr () {
156 | return dateToString(new Date())
157 | }
158 |
159 | function dateToString (date) {
160 | if (typeof date === 'string') return date
161 | return date.toISOString().split('T')[0]
162 | }
163 |
164 | function toAbsolute (url, base) {
165 | if (!url.startsWith('/')) return url
166 | const { origin, pathname } = new URL(base)
167 | const relative = pathname === '/' ? url : removeTrailingSlash(pathname) + url
168 | return new URL(relative, origin).href
169 | }
170 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # express-sitemap-xml [![travis][travis-image]][travis-url] [![npm][npm-image]][npm-url] [![downloads][downloads-image]][downloads-url] [![javascript style guide][standard-image]][standard-url]
2 |
3 | [travis-image]: https://img.shields.io/travis/feross/express-sitemap-xml/master.svg
4 | [travis-url]: https://travis-ci.org/feross/express-sitemap-xml
5 | [npm-image]: https://img.shields.io/npm/v/express-sitemap-xml.svg
6 | [npm-url]: https://npmjs.org/package/express-sitemap-xml
7 | [downloads-image]: https://img.shields.io/npm/dm/express-sitemap-xml.svg
8 | [downloads-url]: https://npmjs.org/package/express-sitemap-xml
9 | [standard-image]: https://img.shields.io/badge/code_style-standard-brightgreen.svg
10 | [standard-url]: https://standardjs.com
11 |
12 | ### Express middleware to serve [`sitemap.xml`](https://en.wikipedia.org/wiki/Sitemaps) from a list of URLs
13 |
14 | Create an Express middleware that serves `sitemap.xml` from a list of URLs.
15 |
16 | This package automatically handles sitemaps with more than 50,000 URLs. In these
17 | cases, multiple sitemap files will be generated along with a "sitemap index" to
18 | comply with the [sitemap spec](https://www.sitemaps.org/protocol.html) and
19 | requirements from search engines like Google.
20 |
21 | If only one sitemap file is needed (i.e. there are less than 50,000 URLs) then
22 | it is served directly at `/sitemap.xml`. Otherwise, a sitemap index is served at
23 | `/sitemap.xml` and sitemaps at `/sitemap-0.xml`, `/sitemap-1.xml`, etc.
24 |
25 | ## Install
26 |
27 | ```
28 | npm install express-sitemap-xml
29 | ```
30 |
31 | ## Demo
32 |
33 | You can see this package in action on [BitMidi](https://bitmidi.com), a site for
34 | listening to your favorite MIDI files.
35 |
36 | ## Usage (with Express)
37 |
38 | The easiest way to use this package is with the Express middleware.
39 |
40 | ```js
41 | const express = require('express')
42 | const expressSitemapXml = require('express-sitemap-xml')
43 |
44 | const app = express()
45 |
46 | app.use(expressSitemapXml(getUrls, 'https://bitmidi.com'))
47 |
48 | async function getUrls () {
49 | return await getUrlsFromDatabase()
50 | }
51 | ```
52 |
53 | Remember to add a `Sitemap` line to `robots.txt` like this:
54 |
55 | ```
56 | Sitemap: https://bitmidi.com/sitemap.xml
57 | ```
58 |
59 | ## Usage (without Express)
60 |
61 | The package can also be used without the Express middleware.
62 |
63 | ```js
64 | const { buildSitemaps } = require('express-sitemap-xml')
65 |
66 | async function run () {
67 | const urls = ['/1', '/2', '/3']
68 | const sitemaps = await buildSitemaps(urls, 'https://bitmidi.com')
69 |
70 | console.log(Object.keys(sitemaps))
71 | // ['/sitemap.xml']
72 |
73 | console.log(sitemaps['/sitemap.xml'])
74 | // `
75 | //
76 | //
77 | // https://bitmidi.com/1
78 | // ${getTodayStr()}
79 | //
80 | //
81 | // https://bitmidi.com/2
82 | // ${getTodayStr()}
83 | //
84 | //
85 | // https://bitmidi.com/3
86 | // ${getTodayStr()}
87 | //
88 | // `
89 | })
90 | ```
91 |
92 | Remember to add a `Sitemap` line to `robots.txt` like this:
93 |
94 | ```
95 | Sitemap: https://bitmidi.com/sitemap.xml
96 | ```
97 |
98 | ## API
99 |
100 | ### `middleware = expressSitemapXml(getUrls, base)`
101 |
102 | Create a `sitemap.xml` middleware. Both arguments are required.
103 |
104 | The `getUrls` argument specifies an async function that resolves to an array of
105 | URLs to be included in the sitemap. Each URL in the array can either be an
106 | absolute or relative URL string like `'/1'`, or an object specifying additional
107 | options about the URL:
108 |
109 | ```js
110 | {
111 | url: '/1',
112 | lastMod: new Date('2000-02-02'), // optional (specify `true` for today's date)
113 | changeFreq: 'weekly' // optional
114 | }
115 | ```
116 |
117 | For more information about these options, see the [sitemap spec](https://www.sitemaps.org/protocol.html). Note that the `priority` option is not supported because [Google ignores it](https://twitter.com/methode/status/846796737750712320).
118 |
119 | The `getUrls` function is called at most once per 24 hours. The resulting
120 | sitemap(s) are cached to make repeated HTTP requests faster.
121 |
122 | The `base` argument specifies the base URL to be used in case any URLs are
123 | specified as relative URLs. The argument is also used if a sitemap index needs
124 | to be generated and sitemap locations need to be specified, e.g.
125 | `${base}/sitemap-0.xml` becomes `https://bitmidi.com/sitemap-0.xml`.
126 |
127 | ### `sitemaps = expressSitemapXml.buildSitemaps(urls, base)`
128 |
129 | Create an object where the keys are sitemap URLs to be served by the server and
130 | the values are strings of sitemap XML content. (This function does no caching.)
131 |
132 | The `urls` argument is an array of URLs to be included in the sitemap. Each URL
133 | in the array can either be an absolute or relative URL string like `'/1'`, or an
134 | object specifying additional options about the URL. See above for more info
135 | about the options.
136 |
137 | The `base` argument is the same as above.
138 |
139 | The return value is an object that looks like this:
140 |
141 | ```js
142 | {
143 | '/sitemap.xml': '...'
144 | }
145 | ```
146 |
147 | Or if multiple sitemaps are needed, then the return object looks like this:
148 |
149 | ```js
150 | {
151 | '/sitemap.xml': '...',
152 | '/sitemap-0.xml': '...',
153 | '/sitemap-1.xml': '...'
154 | }
155 | ```
156 |
157 | ## License
158 |
159 | MIT. Copyright (c) [Feross Aboukhadijeh](https://feross.org).
160 |
--------------------------------------------------------------------------------
/test/basic.js:
--------------------------------------------------------------------------------
1 | const test = require('tape')
2 | const { join } = require('path')
3 | const { buildSitemaps } = require('../')
4 | const { readFileSync } = require('fs')
5 | const { stripIndent } = require('common-tags')
6 |
7 | test('basic usage', t => {
8 | t.plan(2)
9 |
10 | const urls = ['/1', '/2', '/3']
11 |
12 | buildSitemaps(urls, 'https://bitmidi.com').then(sitemaps => {
13 | t.deepEqual(new Set(Object.keys(sitemaps)), new Set(['/sitemap.xml']))
14 |
15 | t.equal(sitemaps['/sitemap.xml'], stripIndent`
16 |
17 |
18 |
19 | https://bitmidi.com/1
20 |
21 |
22 | https://bitmidi.com/2
23 |
24 |
25 | https://bitmidi.com/3
26 |
27 |
28 | `)
29 | })
30 | })
31 |
32 | test('nested base url', t => {
33 | t.plan(2)
34 |
35 | const urls = ['/sitemap-0.xml', '/sitemap-1.xml', '/sitemap-2.xml']
36 |
37 | buildSitemaps(urls, 'https://api.teslahunt.io/cars/sitemap').then(
38 | sitemaps => {
39 | t.deepEqual(new Set(Object.keys(sitemaps)), new Set(['/sitemap.xml']))
40 |
41 | t.equal(
42 | sitemaps['/sitemap.xml'],
43 | stripIndent`
44 |
45 |
46 |
47 | https://api.teslahunt.io/cars/sitemap/sitemap-0.xml
48 |
49 |
50 | https://api.teslahunt.io/cars/sitemap/sitemap-1.xml
51 |
52 |
53 | https://api.teslahunt.io/cars/sitemap/sitemap-2.xml
54 |
55 |
56 | `
57 | )
58 | }
59 | )
60 | })
61 |
62 | test('nested base url with trailing slash', t => {
63 | t.plan(2)
64 |
65 | const urls = ['/sitemap-0.xml', '/sitemap-1.xml', '/sitemap-2.xml']
66 |
67 | buildSitemaps(urls, 'https://api.teslahunt.io/cars/sitemap/').then(
68 | sitemaps => {
69 | t.deepEqual(new Set(Object.keys(sitemaps)), new Set(['/sitemap.xml']))
70 |
71 | t.equal(
72 | sitemaps['/sitemap.xml'],
73 | stripIndent`
74 |
75 |
76 |
77 | https://api.teslahunt.io/cars/sitemap/sitemap-0.xml
78 |
79 |
80 | https://api.teslahunt.io/cars/sitemap/sitemap-1.xml
81 |
82 |
83 | https://api.teslahunt.io/cars/sitemap/sitemap-2.xml
84 |
85 |
86 | `
87 | )
88 | }
89 | )
90 | })
91 |
92 | test('usage with all options', t => {
93 | t.plan(2)
94 |
95 | const urls = [
96 | {
97 | url: '/1',
98 | lastMod: '2000-01-01',
99 | changeFreq: 'daily'
100 | },
101 | {
102 | url: '/2',
103 | lastMod: new Date('2000-02-02'),
104 | changeFreq: 'weekly'
105 | },
106 | {
107 | url: '/3',
108 | lastMod: true
109 | },
110 | {
111 | url: '/4'
112 | },
113 | '/5'
114 | ]
115 |
116 | buildSitemaps(urls, 'https://bitmidi.com').then(sitemaps => {
117 | t.deepEqual(new Set(Object.keys(sitemaps)), new Set(['/sitemap.xml']))
118 |
119 | t.equal(sitemaps['/sitemap.xml'], stripIndent`
120 |
121 |
122 |
123 | https://bitmidi.com/1
124 | 2000-01-01
125 | daily
126 |
127 |
128 | https://bitmidi.com/2
129 | 2000-02-02
130 | weekly
131 |
132 |
133 | https://bitmidi.com/3
134 | ${getTodayStr()}
135 |
136 |
137 | https://bitmidi.com/4
138 |
139 |
140 | https://bitmidi.com/5
141 |
142 |
143 | `)
144 | })
145 | })
146 |
147 | test('usage with images', t => {
148 | t.plan(2)
149 |
150 | const urls = [
151 | {
152 | url: '/1',
153 | image: '/1.png'
154 | },
155 | {
156 | url: '/2',
157 | image: ['/2.png', '/3.png']
158 | },
159 | ]
160 |
161 | buildSitemaps(urls, 'https://bitmidi.com').then(sitemaps => {
162 | t.deepEqual(new Set(Object.keys(sitemaps)), new Set(['/sitemap.xml']))
163 |
164 | console.log(sitemaps['/sitemap.xml'])
165 |
166 | t.equal(sitemaps['/sitemap.xml'], stripIndent`
167 |
168 |
169 |
170 | https://bitmidi.com/1
171 |
172 | https://bitmidi.com/1.png
173 |
174 |
175 |
176 | https://bitmidi.com/2
177 |
178 | https://bitmidi.com/2.png
179 |
180 |
181 | https://bitmidi.com/3.png
182 |
183 |
184 |
185 | `)
186 | })
187 | })
188 |
189 | test('large test: use sitemap index for > 50,000 urls', t => {
190 | t.plan(4)
191 |
192 | const urls = []
193 | for (let i = 0; i < 60000; i++) {
194 | urls.push({
195 | url: `/${i}`,
196 | lastMod: true
197 | })
198 | }
199 |
200 | buildSitemaps(urls, 'https://bitmidi.com').then(sitemaps => {
201 | t.deepEqual(
202 | new Set(Object.keys(sitemaps)),
203 | new Set(['/sitemap.xml', '/sitemap-0.xml', '/sitemap-1.xml']))
204 |
205 | t.equal(
206 | sitemaps['/sitemap.xml'],
207 | readFileSync(join(__dirname, 'large-sitemap.xml'), 'utf8')
208 | .replace(/2018-07-15/g, getTodayStr())
209 | )
210 | t.equal(
211 | sitemaps['/sitemap-0.xml'],
212 | readFileSync(join(__dirname, 'large-sitemap-0.xml'), 'utf8')
213 | .replace(/2018-07-15/g, getTodayStr())
214 | )
215 | t.equal(
216 | sitemaps['/sitemap-1.xml'],
217 | readFileSync(join(__dirname, 'large-sitemap-1.xml'), 'utf8')
218 | .replace(/2018-07-15/g, getTodayStr())
219 | )
220 | })
221 | })
222 |
223 | /** Utility function implementations copied from index.js */
224 |
225 | function getTodayStr () {
226 | return dateToString(new Date())
227 | }
228 |
229 | function dateToString (date) {
230 | if (typeof date === 'string') return date
231 | return date.toISOString().split('T')[0]
232 | }
233 |
--------------------------------------------------------------------------------