├── .npmignore ├── .travis.yml ├── test ├── large-sitemap.xml └── basic.js ├── CHANGELOG.md ├── LICENSE ├── package.json ├── index.js └── README.md /.npmignore: -------------------------------------------------------------------------------- 1 | .travis.yml 2 | test/ 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - lts/* 4 | -------------------------------------------------------------------------------- /test/large-sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | https://bitmidi.com/sitemap-0.xml 5 | 2018-07-15 6 | 7 | 8 | https://bitmidi.com/sitemap-1.xml 9 | 2018-07-15 10 | 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines. 4 | 5 | ## [3.1.0](https://github.com/feross/express-sitemap-xml/compare/v3.0.0...v3.1.0) (2024-04-19) 6 | 7 | 8 | ### Features 9 | 10 | * add image support ([5245d85](https://github.com/feross/express-sitemap-xml/commit/5245d851b08e929d27e1bc3cf9dbe2a09100072e)) 11 | 12 | 13 | ### Bug Fixes 14 | 15 | * base url can be nested ([28e62df](https://github.com/feross/express-sitemap-xml/commit/28e62dfebac163bf1938ccfd35bdab1c87f0a8aa)) 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Feross Aboukhadijeh 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "express-sitemap-xml", 3 | "description": "Express middleware to serve `sitemap.xml` from a list of URLs", 4 | "version": "3.1.0", 5 | "author": { 6 | "name": "Feross Aboukhadijeh", 7 | "email": "feross@feross.org", 8 | "url": "https://feross.org" 9 | }, 10 | "bugs": { 11 | "url": "https://github.com/feross/express-sitemap-xml/issues" 12 | }, 13 | "dependencies": { 14 | "p-memoize": "^4.0.1", 15 | "xmlbuilder": "^15.1.1" 16 | }, 17 | "devDependencies": { 18 | "common-tags": "^1.8.0", 19 | "standard": "*", 20 | "tape": "^5.0.1" 21 | }, 22 | "homepage": "https://github.com/feross/express-sitemap-xml", 23 | "keywords": [ 24 | "express", 25 | "google", 26 | "serve sitemap", 27 | "serve sitemap.xml", 28 | "site map", 29 | "site map xml", 30 | "sitemap", 31 | "sitemap generator", 32 | "sitemap xml", 33 | "sitemap.xml", 34 | "sitemaps", 35 | "xml" 36 | ], 37 | "license": "MIT", 38 | "main": "index.js", 39 | "repository": { 40 | "type": "git", 41 | "url": "git://github.com/feross/express-sitemap-xml.git" 42 | }, 43 | "scripts": { 44 | "test": "standard && tape test/**/*.js" 45 | }, 46 | "funding": [ 47 | { 48 | "type": "github", 49 | "url": "https://github.com/sponsors/feross" 50 | }, 51 | { 52 | "type": "patreon", 53 | "url": "https://www.patreon.com/feross" 54 | }, 55 | { 56 | "type": "consulting", 57 | "url": "https://feross.org/support" 58 | } 59 | ] 60 | } 61 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | /*! express-sitemap-xml. MIT License. Feross Aboukhadijeh */ 2 | module.exports = expressSitemapXml 3 | module.exports.buildSitemaps = buildSitemaps 4 | 5 | const builder = require('xmlbuilder') 6 | const pMemoize = require('p-memoize') 7 | const { URL } = require('url') // TODO: Remove once Node 8 support is dropped 8 | 9 | const MAX_SITEMAP_LENGTH = 50 * 1000 // Max URLs in a sitemap (defined by spec) 10 | const SITEMAP_URL_RE = /\/sitemap(-\d+)?\.xml/ // Sitemap url pattern 11 | const SITEMAP_MAX_AGE = 24 * 60 * 60 * 1000 // Cache sitemaps for 24 hours 12 | 13 | const TRAILING_SLASH_RE = /\/+$/ 14 | 15 | function removeTrailingSlash (str) { 16 | return str.replace(TRAILING_SLASH_RE, '') 17 | } 18 | 19 | function expressSitemapXml ( 20 | getUrls, 21 | base, 22 | { size = MAX_SITEMAP_LENGTH, maxAge = SITEMAP_MAX_AGE } = {} 23 | ) { 24 | if (typeof getUrls !== 'function') { 25 | throw new Error('Argument `getUrls` must be a function') 26 | } 27 | if (typeof base !== 'string') { 28 | throw new Error('Argument `base` must be a string') 29 | } 30 | 31 | async function loadSitemaps () { 32 | const urls = await getUrls() 33 | if (!Array.isArray(urls)) { 34 | throw new Error('async function `getUrls` must resolve to an Array') 35 | } 36 | return buildSitemaps(urls, base, size) 37 | } 38 | 39 | const memoizedLoad = pMemoize(loadSitemaps, { maxAge }) 40 | 41 | return async (req, res, next) => { 42 | const isSitemapUrl = SITEMAP_URL_RE.test(req.url) 43 | if (isSitemapUrl) { 44 | const sitemaps = await memoizedLoad() 45 | if (sitemaps[req.url]) { 46 | res.setHeader('Content-Type', 'application/xml') 47 | return res.status(200).send(sitemaps[req.url]) 48 | } 49 | } 50 | next() 51 | } 52 | } 53 | 54 | async function buildSitemaps (urls, base, size = MAX_SITEMAP_LENGTH) { 55 | const sitemaps = Object.create(null) 56 | 57 | if (urls.length <= size) { 58 | // If there is only one sitemap (i.e. there are less than 50,000 URLs) 59 | // then serve it directly at /sitemap.xml 60 | sitemaps['/sitemap.xml'] = buildSitemap(urls, base) 61 | } else { 62 | // Otherwise, serve a sitemap index at /sitemap.xml and sitemaps at 63 | // /sitemap-0.xml, /sitemap-1.xml, etc. 64 | for (let i = 0; i * size < urls.length; i++) { 65 | const start = i * size 66 | const selectedUrls = urls.slice(start, start + size) 67 | sitemaps[`/sitemap-${i}.xml`] = buildSitemap(selectedUrls, base) 68 | } 69 | sitemaps['/sitemap.xml'] = buildSitemapIndex(sitemaps, base) 70 | } 71 | 72 | return sitemaps 73 | } 74 | 75 | function buildSitemapIndex (sitemaps, base) { 76 | const sitemapObjs = Object.keys(sitemaps).map(sitemapUrl => { 77 | return { 78 | loc: toAbsolute(sitemapUrl, base), 79 | lastmod: getTodayStr() 80 | } 81 | }) 82 | 83 | const sitemapIndexObj = { 84 | sitemapindex: { 85 | '@xmlns': 'http://www.sitemaps.org/schemas/sitemap/0.9', 86 | sitemap: sitemapObjs 87 | } 88 | } 89 | 90 | return buildXml(sitemapIndexObj) 91 | } 92 | 93 | function buildSitemap (urls, base) { 94 | const urlObjs = urls.map(url => { 95 | if (typeof url === 'string') { 96 | return { 97 | loc: toAbsolute(url, base) 98 | } 99 | } 100 | 101 | if (typeof url.url !== 'string') { 102 | throw new Error( 103 | `Invalid sitemap url object, missing 'url' property: ${JSON.stringify( 104 | url 105 | )}` 106 | ) 107 | } 108 | 109 | const urlObj = { 110 | loc: toAbsolute(url.url, base) 111 | } 112 | if (url.lastMod === true) { 113 | urlObj.lastmod = getTodayStr() 114 | } else if (typeof url.lastMod === 'string' || url.lastMod instanceof Date) { 115 | urlObj.lastmod = dateToString(url.lastMod) 116 | } 117 | 118 | if (typeof url.changeFreq === 'string') { 119 | urlObj.changefreq = url.changeFreq 120 | } 121 | 122 | if (typeof url.image === 'string') { 123 | urlObj['image:image'] = { 124 | 'image:loc': toAbsolute(url.image, base) 125 | } 126 | } else if (Array.isArray(url.image)) { 127 | urlObj['image:image'] = url.image.map(image => { 128 | return { 129 | 'image:loc': toAbsolute(image, base) 130 | } 131 | }) 132 | } 133 | 134 | return urlObj 135 | }) 136 | 137 | const sitemapObj = { 138 | urlset: { 139 | '@xmlns': 'http://www.sitemaps.org/schemas/sitemap/0.9', 140 | url: urlObjs 141 | } 142 | } 143 | 144 | return buildXml(sitemapObj) 145 | } 146 | 147 | function buildXml (obj) { 148 | const opts = { 149 | encoding: 'utf-8' 150 | } 151 | const xml = builder.create(obj, opts) 152 | return xml.end({ pretty: true, allowEmpty: false }) 153 | } 154 | 155 | function getTodayStr () { 156 | return dateToString(new Date()) 157 | } 158 | 159 | function dateToString (date) { 160 | if (typeof date === 'string') return date 161 | return date.toISOString().split('T')[0] 162 | } 163 | 164 | function toAbsolute (url, base) { 165 | if (!url.startsWith('/')) return url 166 | const { origin, pathname } = new URL(base) 167 | const relative = pathname === '/' ? url : removeTrailingSlash(pathname) + url 168 | return new URL(relative, origin).href 169 | } 170 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # express-sitemap-xml [![travis][travis-image]][travis-url] [![npm][npm-image]][npm-url] [![downloads][downloads-image]][downloads-url] [![javascript style guide][standard-image]][standard-url] 2 | 3 | [travis-image]: https://img.shields.io/travis/feross/express-sitemap-xml/master.svg 4 | [travis-url]: https://travis-ci.org/feross/express-sitemap-xml 5 | [npm-image]: https://img.shields.io/npm/v/express-sitemap-xml.svg 6 | [npm-url]: https://npmjs.org/package/express-sitemap-xml 7 | [downloads-image]: https://img.shields.io/npm/dm/express-sitemap-xml.svg 8 | [downloads-url]: https://npmjs.org/package/express-sitemap-xml 9 | [standard-image]: https://img.shields.io/badge/code_style-standard-brightgreen.svg 10 | [standard-url]: https://standardjs.com 11 | 12 | ### Express middleware to serve [`sitemap.xml`](https://en.wikipedia.org/wiki/Sitemaps) from a list of URLs 13 | 14 | Create an Express middleware that serves `sitemap.xml` from a list of URLs. 15 | 16 | This package automatically handles sitemaps with more than 50,000 URLs. In these 17 | cases, multiple sitemap files will be generated along with a "sitemap index" to 18 | comply with the [sitemap spec](https://www.sitemaps.org/protocol.html) and 19 | requirements from search engines like Google. 20 | 21 | If only one sitemap file is needed (i.e. there are less than 50,000 URLs) then 22 | it is served directly at `/sitemap.xml`. Otherwise, a sitemap index is served at 23 | `/sitemap.xml` and sitemaps at `/sitemap-0.xml`, `/sitemap-1.xml`, etc. 24 | 25 | ## Install 26 | 27 | ``` 28 | npm install express-sitemap-xml 29 | ``` 30 | 31 | ## Demo 32 | 33 | You can see this package in action on [BitMidi](https://bitmidi.com), a site for 34 | listening to your favorite MIDI files. 35 | 36 | ## Usage (with Express) 37 | 38 | The easiest way to use this package is with the Express middleware. 39 | 40 | ```js 41 | const express = require('express') 42 | const expressSitemapXml = require('express-sitemap-xml') 43 | 44 | const app = express() 45 | 46 | app.use(expressSitemapXml(getUrls, 'https://bitmidi.com')) 47 | 48 | async function getUrls () { 49 | return await getUrlsFromDatabase() 50 | } 51 | ``` 52 | 53 | Remember to add a `Sitemap` line to `robots.txt` like this: 54 | 55 | ``` 56 | Sitemap: https://bitmidi.com/sitemap.xml 57 | ``` 58 | 59 | ## Usage (without Express) 60 | 61 | The package can also be used without the Express middleware. 62 | 63 | ```js 64 | const { buildSitemaps } = require('express-sitemap-xml') 65 | 66 | async function run () { 67 | const urls = ['/1', '/2', '/3'] 68 | const sitemaps = await buildSitemaps(urls, 'https://bitmidi.com') 69 | 70 | console.log(Object.keys(sitemaps)) 71 | // ['/sitemap.xml'] 72 | 73 | console.log(sitemaps['/sitemap.xml']) 74 | // ` 75 | // 76 | // 77 | // https://bitmidi.com/1 78 | // ${getTodayStr()} 79 | // 80 | // 81 | // https://bitmidi.com/2 82 | // ${getTodayStr()} 83 | // 84 | // 85 | // https://bitmidi.com/3 86 | // ${getTodayStr()} 87 | // 88 | // ` 89 | }) 90 | ``` 91 | 92 | Remember to add a `Sitemap` line to `robots.txt` like this: 93 | 94 | ``` 95 | Sitemap: https://bitmidi.com/sitemap.xml 96 | ``` 97 | 98 | ## API 99 | 100 | ### `middleware = expressSitemapXml(getUrls, base)` 101 | 102 | Create a `sitemap.xml` middleware. Both arguments are required. 103 | 104 | The `getUrls` argument specifies an async function that resolves to an array of 105 | URLs to be included in the sitemap. Each URL in the array can either be an 106 | absolute or relative URL string like `'/1'`, or an object specifying additional 107 | options about the URL: 108 | 109 | ```js 110 | { 111 | url: '/1', 112 | lastMod: new Date('2000-02-02'), // optional (specify `true` for today's date) 113 | changeFreq: 'weekly' // optional 114 | } 115 | ``` 116 | 117 | For more information about these options, see the [sitemap spec](https://www.sitemaps.org/protocol.html). Note that the `priority` option is not supported because [Google ignores it](https://twitter.com/methode/status/846796737750712320). 118 | 119 | The `getUrls` function is called at most once per 24 hours. The resulting 120 | sitemap(s) are cached to make repeated HTTP requests faster. 121 | 122 | The `base` argument specifies the base URL to be used in case any URLs are 123 | specified as relative URLs. The argument is also used if a sitemap index needs 124 | to be generated and sitemap locations need to be specified, e.g. 125 | `${base}/sitemap-0.xml` becomes `https://bitmidi.com/sitemap-0.xml`. 126 | 127 | ### `sitemaps = expressSitemapXml.buildSitemaps(urls, base)` 128 | 129 | Create an object where the keys are sitemap URLs to be served by the server and 130 | the values are strings of sitemap XML content. (This function does no caching.) 131 | 132 | The `urls` argument is an array of URLs to be included in the sitemap. Each URL 133 | in the array can either be an absolute or relative URL string like `'/1'`, or an 134 | object specifying additional options about the URL. See above for more info 135 | about the options. 136 | 137 | The `base` argument is the same as above. 138 | 139 | The return value is an object that looks like this: 140 | 141 | ```js 142 | { 143 | '/sitemap.xml': '...' 144 | } 145 | ``` 146 | 147 | Or if multiple sitemaps are needed, then the return object looks like this: 148 | 149 | ```js 150 | { 151 | '/sitemap.xml': '...', 152 | '/sitemap-0.xml': '...', 153 | '/sitemap-1.xml': '...' 154 | } 155 | ``` 156 | 157 | ## License 158 | 159 | MIT. Copyright (c) [Feross Aboukhadijeh](https://feross.org). 160 | -------------------------------------------------------------------------------- /test/basic.js: -------------------------------------------------------------------------------- 1 | const test = require('tape') 2 | const { join } = require('path') 3 | const { buildSitemaps } = require('../') 4 | const { readFileSync } = require('fs') 5 | const { stripIndent } = require('common-tags') 6 | 7 | test('basic usage', t => { 8 | t.plan(2) 9 | 10 | const urls = ['/1', '/2', '/3'] 11 | 12 | buildSitemaps(urls, 'https://bitmidi.com').then(sitemaps => { 13 | t.deepEqual(new Set(Object.keys(sitemaps)), new Set(['/sitemap.xml'])) 14 | 15 | t.equal(sitemaps['/sitemap.xml'], stripIndent` 16 | 17 | 18 | 19 | https://bitmidi.com/1 20 | 21 | 22 | https://bitmidi.com/2 23 | 24 | 25 | https://bitmidi.com/3 26 | 27 | 28 | `) 29 | }) 30 | }) 31 | 32 | test('nested base url', t => { 33 | t.plan(2) 34 | 35 | const urls = ['/sitemap-0.xml', '/sitemap-1.xml', '/sitemap-2.xml'] 36 | 37 | buildSitemaps(urls, 'https://api.teslahunt.io/cars/sitemap').then( 38 | sitemaps => { 39 | t.deepEqual(new Set(Object.keys(sitemaps)), new Set(['/sitemap.xml'])) 40 | 41 | t.equal( 42 | sitemaps['/sitemap.xml'], 43 | stripIndent` 44 | 45 | 46 | 47 | https://api.teslahunt.io/cars/sitemap/sitemap-0.xml 48 | 49 | 50 | https://api.teslahunt.io/cars/sitemap/sitemap-1.xml 51 | 52 | 53 | https://api.teslahunt.io/cars/sitemap/sitemap-2.xml 54 | 55 | 56 | ` 57 | ) 58 | } 59 | ) 60 | }) 61 | 62 | test('nested base url with trailing slash', t => { 63 | t.plan(2) 64 | 65 | const urls = ['/sitemap-0.xml', '/sitemap-1.xml', '/sitemap-2.xml'] 66 | 67 | buildSitemaps(urls, 'https://api.teslahunt.io/cars/sitemap/').then( 68 | sitemaps => { 69 | t.deepEqual(new Set(Object.keys(sitemaps)), new Set(['/sitemap.xml'])) 70 | 71 | t.equal( 72 | sitemaps['/sitemap.xml'], 73 | stripIndent` 74 | 75 | 76 | 77 | https://api.teslahunt.io/cars/sitemap/sitemap-0.xml 78 | 79 | 80 | https://api.teslahunt.io/cars/sitemap/sitemap-1.xml 81 | 82 | 83 | https://api.teslahunt.io/cars/sitemap/sitemap-2.xml 84 | 85 | 86 | ` 87 | ) 88 | } 89 | ) 90 | }) 91 | 92 | test('usage with all options', t => { 93 | t.plan(2) 94 | 95 | const urls = [ 96 | { 97 | url: '/1', 98 | lastMod: '2000-01-01', 99 | changeFreq: 'daily' 100 | }, 101 | { 102 | url: '/2', 103 | lastMod: new Date('2000-02-02'), 104 | changeFreq: 'weekly' 105 | }, 106 | { 107 | url: '/3', 108 | lastMod: true 109 | }, 110 | { 111 | url: '/4' 112 | }, 113 | '/5' 114 | ] 115 | 116 | buildSitemaps(urls, 'https://bitmidi.com').then(sitemaps => { 117 | t.deepEqual(new Set(Object.keys(sitemaps)), new Set(['/sitemap.xml'])) 118 | 119 | t.equal(sitemaps['/sitemap.xml'], stripIndent` 120 | 121 | 122 | 123 | https://bitmidi.com/1 124 | 2000-01-01 125 | daily 126 | 127 | 128 | https://bitmidi.com/2 129 | 2000-02-02 130 | weekly 131 | 132 | 133 | https://bitmidi.com/3 134 | ${getTodayStr()} 135 | 136 | 137 | https://bitmidi.com/4 138 | 139 | 140 | https://bitmidi.com/5 141 | 142 | 143 | `) 144 | }) 145 | }) 146 | 147 | test('usage with images', t => { 148 | t.plan(2) 149 | 150 | const urls = [ 151 | { 152 | url: '/1', 153 | image: '/1.png' 154 | }, 155 | { 156 | url: '/2', 157 | image: ['/2.png', '/3.png'] 158 | }, 159 | ] 160 | 161 | buildSitemaps(urls, 'https://bitmidi.com').then(sitemaps => { 162 | t.deepEqual(new Set(Object.keys(sitemaps)), new Set(['/sitemap.xml'])) 163 | 164 | console.log(sitemaps['/sitemap.xml']) 165 | 166 | t.equal(sitemaps['/sitemap.xml'], stripIndent` 167 | 168 | 169 | 170 | https://bitmidi.com/1 171 | 172 | https://bitmidi.com/1.png 173 | 174 | 175 | 176 | https://bitmidi.com/2 177 | 178 | https://bitmidi.com/2.png 179 | 180 | 181 | https://bitmidi.com/3.png 182 | 183 | 184 | 185 | `) 186 | }) 187 | }) 188 | 189 | test('large test: use sitemap index for > 50,000 urls', t => { 190 | t.plan(4) 191 | 192 | const urls = [] 193 | for (let i = 0; i < 60000; i++) { 194 | urls.push({ 195 | url: `/${i}`, 196 | lastMod: true 197 | }) 198 | } 199 | 200 | buildSitemaps(urls, 'https://bitmidi.com').then(sitemaps => { 201 | t.deepEqual( 202 | new Set(Object.keys(sitemaps)), 203 | new Set(['/sitemap.xml', '/sitemap-0.xml', '/sitemap-1.xml'])) 204 | 205 | t.equal( 206 | sitemaps['/sitemap.xml'], 207 | readFileSync(join(__dirname, 'large-sitemap.xml'), 'utf8') 208 | .replace(/2018-07-15/g, getTodayStr()) 209 | ) 210 | t.equal( 211 | sitemaps['/sitemap-0.xml'], 212 | readFileSync(join(__dirname, 'large-sitemap-0.xml'), 'utf8') 213 | .replace(/2018-07-15/g, getTodayStr()) 214 | ) 215 | t.equal( 216 | sitemaps['/sitemap-1.xml'], 217 | readFileSync(join(__dirname, 'large-sitemap-1.xml'), 'utf8') 218 | .replace(/2018-07-15/g, getTodayStr()) 219 | ) 220 | }) 221 | }) 222 | 223 | /** Utility function implementations copied from index.js */ 224 | 225 | function getTodayStr () { 226 | return dateToString(new Date()) 227 | } 228 | 229 | function dateToString (date) { 230 | if (typeof date === 'string') return date 231 | return date.toISOString().split('T')[0] 232 | } 233 | --------------------------------------------------------------------------------