├── src
    ├── helpers
    │   ├── stringifyURL.js
    │   ├── escapeUnsafe.js
    │   ├── getCurrentDateTime.js
    │   ├── extendFilename.js
    │   ├── validChangeFreq.js
    │   └── __tests__
    │   │   ├── getCurrentDateTime.js
    │   │   ├── validChangeFreq.js
    │   │   ├── stringifyURL.js
    │   │   ├── extendFilename.js
    │   │   └── escapeUnsafe.js
    ├── __tests__
    │   ├── discoverResources.js
    │   ├── index.js
    │   ├── createCrawler.js
    │   ├── createSitemapIndex.js
    │   ├── SitemapStream.js
    │   └── SitemapRotator.js
    ├── createSitemapIndex.js
    ├── SitemapStream.js
    ├── discoverResources.js
    ├── SitemapRotator.js
    ├── createCrawler.js
    └── index.js
├── .travis.yml
├── .github
    ├── ISSUE_TEMPLATE.md
    └── workflows
    │   └── nodejs.yml
├── .editorconfig
├── .gitignore
├── LICENSE
├── package.json
└── README.md


/src/helpers/stringifyURL.js:
--------------------------------------------------------------------------------
1 | module.exports = parsed =>
2 |   `${parsed.protocol}://${parsed.host}${parsed.uriPath}`;
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: node_js
 2 | 
 3 | node_js: 10
 4 | 
 5 | deploy:
 6 |   provider: script
 7 |   script:
 8 |     - 'npx semantic-release'
 9 |   skip_cleanup: true
10 | 


--------------------------------------------------------------------------------
/src/helpers/escapeUnsafe.js:
--------------------------------------------------------------------------------
1 | module.exports = unsafe =>
2 |   unsafe
3 |     .replace(/&/g, '&amp;')
4 |     .replace(/</g, '&lt;')
5 |     .replace(/>/g, '&gt;')
6 |     .replace(/"/g, '&quot;')
7 |     .replace(/'/g, '&apos;');
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | **Do you want to request a *feature* or report a *bug*?**
2 | 
3 | **What is the current behavior?**
4 | 
5 | **If the current behavior is a bug, please provide the steps to reproduce.**
6 | 
7 | **What is the expected behavior?**
8 | 
9 | 


--------------------------------------------------------------------------------
/src/__tests__/discoverResources.js:
--------------------------------------------------------------------------------
1 | const discoverResources = require('../discoverResources');
2 | 
3 | describe('#discoverResources', () => {
4 |   test('should be a function', () => {
5 |     expect(discoverResources).toBeInstanceOf(Function);
6 |   });
7 | });
8 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 2
 8 | end_of_line = lf
 9 | charset = utf-8
10 | trim_trailing_whitespace = true
11 | insert_final_newline = true
12 | 
13 | [*.md]
14 | trim_trailing_whitespace = false


--------------------------------------------------------------------------------
/src/helpers/getCurrentDateTime.js:
--------------------------------------------------------------------------------
1 | module.exports = () => {
2 |   const now = new Date();
3 |   const year = now.getFullYear();
4 |   const month =
5 |     now.getMonth() + 1 < 10 ? `0${now.getMonth() + 1}` : now.getMonth() + 1;
6 |   const date = now.getDate() < 10 ? `0${now.getDate()}` : now.getDate();
7 |   return `${year}-${month}-${date}`;
8 | };
9 | 


--------------------------------------------------------------------------------
/src/helpers/extendFilename.js:
--------------------------------------------------------------------------------
 1 | /* eslint no-bitwise:0 */
 2 | 
 3 | module.exports = (fpath, str) => {
 4 |   const ext = fpath.slice(((fpath.lastIndexOf('.') - 1) >>> 0) + 2);
 5 | 
 6 |   let newFilename;
 7 | 
 8 |   if (ext) {
 9 |     newFilename = fpath.replace(`.${ext}`, `${str}.${ext}`);
10 |   } else {
11 |     newFilename = `${fpath}${str}`;
12 |   }
13 | 
14 |   return newFilename;
15 | };
16 | 


--------------------------------------------------------------------------------
/src/helpers/validChangeFreq.js:
--------------------------------------------------------------------------------
 1 | module.exports = desiredChangeFreq => {
 2 |   const acceptedChangeFreqs = [
 3 |     'always',
 4 |     'hourly',
 5 |     'daily',
 6 |     'weekly',
 7 |     'monthly',
 8 |     'yearly',
 9 |     'never',
10 |   ];
11 |   if (acceptedChangeFreqs.indexOf(desiredChangeFreq) === -1) {
12 |     // eslint-disable-next-line
13 |     console.warn('Desired change frequency is not a valid type. Ignoring.');
14 |     return '';
15 |   }
16 |   return desiredChangeFreq;
17 | };
18 | 


--------------------------------------------------------------------------------
/src/helpers/__tests__/getCurrentDateTime.js:
--------------------------------------------------------------------------------
 1 | const getCurrentDateTime = require('../getCurrentDateTime');
 2 | 
 3 | describe('#getCurrentDateTime', () => {
 4 |   test('should be a function', () => {
 5 |     expect(getCurrentDateTime).toBeInstanceOf(Function);
 6 |   });
 7 | 
 8 |   test('should return a string', () => {
 9 |     expect(typeof getCurrentDateTime()).toBe('string');
10 |   });
11 | 
12 |   test('should match standard date string', () => {
13 |     expect(getCurrentDateTime()).toMatch(/\d{4}-\d{2}-\d{2}/);
14 |   });
15 | });
16 | 


--------------------------------------------------------------------------------
/src/helpers/__tests__/validChangeFreq.js:
--------------------------------------------------------------------------------
 1 | const validChangeFreq = require('../validChangeFreq');
 2 | 
 3 | describe('#validateChangeFreq', () => {
 4 |   test('should be a function', () => {
 5 |     expect(validChangeFreq).toBeInstanceOf(Function);
 6 |   });
 7 | 
 8 |   test('should return string when valid', () => {
 9 |     expect(typeof validChangeFreq('daily')).toBe('string');
10 |   });
11 | 
12 |   test('should return empty string when invalid', () => {
13 |     const changeFreq = validChangeFreq('invalid');
14 |     expect(typeof changeFreq).toBe('string');
15 |     expect(changeFreq).toBe('');
16 |   });
17 | });
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | 
 5 | # Runtime data
 6 | pids
 7 | *.pid
 8 | *.seed
 9 | 
10 | # Directory for instrumented libs generated by jscoverage/JSCover
11 | lib-cov
12 | 
13 | # Coverage directory used by tools like istanbul
14 | coverage
15 | 
16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
17 | .grunt
18 | 
19 | # node-waf configuration
20 | .lock-wscript
21 | 
22 | # Compiled binary addons (http://nodejs.org/api/addons.html)
23 | build/Release
24 | 
25 | # Dependency directory
26 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git
27 | node_modules
28 | 


--------------------------------------------------------------------------------
/src/__tests__/index.js:
--------------------------------------------------------------------------------
 1 | const SitemapGenerator = require('../');
 2 | 
 3 | describe('#SitemapGenerator', () => {
 4 |   let gen;
 5 | 
 6 |   beforeEach(() => {
 7 |     gen = SitemapGenerator('http://foo.bar');
 8 |   });
 9 | 
10 |   test('should be a function', () => {
11 |     expect(SitemapGenerator).toBeInstanceOf(Function);
12 |   });
13 | 
14 |   test('should have method start', () => {
15 |     expect(gen).toHaveProperty('start');
16 |   });
17 | 
18 |   test('should have method stop', () => {
19 |     expect(gen).toHaveProperty('stop');
20 |   });
21 | 
22 |   test('should have method queueURL', () => {
23 |     expect(gen).toHaveProperty('queueURL');
24 |   });
25 | });
26 | 


--------------------------------------------------------------------------------
/src/helpers/__tests__/stringifyURL.js:
--------------------------------------------------------------------------------
 1 | const stringifyURL = require('../stringifyURL');
 2 | 
 3 | describe('#stringifyURL', () => {
 4 |   const url = {
 5 |     protocol: 'http',
 6 |     host: 'example.com',
 7 |     uriPath: '/test',
 8 |   };
 9 | 
10 |   test('should be a function', () => {
11 |     expect(stringifyURL).toBeInstanceOf(Function);
12 |   });
13 | 
14 |   test('should return a string', () => {
15 |     const str = stringifyURL(url);
16 | 
17 |     expect(typeof str).toBe('string');
18 |   });
19 | 
20 |   test('should create valid URL string', () => {
21 |     const str = stringifyURL(url);
22 | 
23 |     expect(str).toBe('http://example.com/test');
24 |   });
25 | });
26 | 


--------------------------------------------------------------------------------
/src/createSitemapIndex.js:
--------------------------------------------------------------------------------
 1 | const extendFilename = require('./helpers/extendFilename');
 2 | 
 3 | module.exports = (url, filename, sitemapCount) => {
 4 |   let sitemapIndex =
 5 |     '<?xml version="1.0" encoding="UTF-8"?>\n<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">';
 6 | 
 7 |   for (let i = 1; i <= sitemapCount; i += 1) {
 8 |     // generate sitemap part url
 9 |     const newFilename = extendFilename(filename, `_part${i}`);
10 | 
11 |     const sitemapUrl = `${url.replace(/\/$/, '')}/${newFilename}`;
12 |     sitemapIndex += `\n  <sitemap>\n    <loc>${sitemapUrl}</loc>\n  </sitemap>`;
13 |   }
14 |   sitemapIndex += '\n</sitemapindex>';
15 | 
16 |   return sitemapIndex;
17 | };
18 | 


--------------------------------------------------------------------------------
/src/__tests__/createCrawler.js:
--------------------------------------------------------------------------------
 1 | const createCrawler = require('../createCrawler');
 2 | const Crawler = require('simplecrawler');
 3 | const parse = require('url-parse');
 4 | 
 5 | describe('#createCrawler', () => {
 6 |   test('should export a function', () => {
 7 |     expect(createCrawler).toBeInstanceOf(Function);
 8 |   });
 9 | 
10 |   test('should return crawler instance', () => {
11 |     const crawler = createCrawler(parse('http://example.com'));
12 |     expect(crawler).toBeInstanceOf(Crawler);
13 |   });
14 | 
15 |   test('should apply options to crawler', () => {
16 |     const options = {
17 |       maxDepth: 2,
18 |     };
19 |     const crawler = createCrawler(parse('http://example.com'), options);
20 |     expect(crawler).toHaveProperty('maxDepth', 2);
21 |   });
22 | });
23 | 


--------------------------------------------------------------------------------
/src/helpers/__tests__/extendFilename.js:
--------------------------------------------------------------------------------
 1 | const extendFilename = require('../extendFilename');
 2 | 
 3 | describe('#extendFilename', () => {
 4 |   test('should be a function', () => {
 5 |     expect(extendFilename).toBeInstanceOf(Function);
 6 |   });
 7 | 
 8 |   test('should return a string', () => {
 9 |     const newFilename = extendFilename('sitemap.xml', '_part1');
10 | 
11 |     expect(typeof newFilename).toBe('string');
12 |   });
13 | 
14 |   test('should extend filename with string', () => {
15 |     const newFilename = extendFilename('sitemap.xml', '_part1');
16 | 
17 |     expect(newFilename).toBe('sitemap_part1.xml');
18 |   });
19 | 
20 |   test('should extend filenames without extension', () => {
21 |     const newFilename = extendFilename('sitemap', '_part1');
22 | 
23 |     expect(newFilename).toBe('sitemap_part1');
24 |   });
25 | });
26 | 


--------------------------------------------------------------------------------
/src/__tests__/createSitemapIndex.js:
--------------------------------------------------------------------------------
 1 | const createSitemapIndex = require('../createSitemapIndex');
 2 | 
 3 | describe('#createSitemapIndex', () => {
 4 |   const url = 'http://example.com';
 5 |   const filename = 'sitemap.xml';
 6 |   const count = 2;
 7 | 
 8 |   test('should be a function', () => {
 9 |     expect(createSitemapIndex).toBeInstanceOf(Function);
10 |   });
11 | 
12 |   test('should return string', () => {
13 |     const sitemapIndex = createSitemapIndex(url, filename, count);
14 |     expect(typeof sitemapIndex).toBe('string');
15 |   });
16 | 
17 |   test('should contain sitemap part url', () => {
18 |     const sitemapIndex = createSitemapIndex(url, filename, count);
19 |     const regex = new RegExp(
20 |       `${url.replace(/\/$/, '')}/sitemap_part${count}.xml`
21 |     );
22 |     expect(sitemapIndex).toMatch(regex);
23 |   });
24 | });
25 | 


--------------------------------------------------------------------------------
/.github/workflows/nodejs.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions
 3 | 
 4 | name: Node.js CI
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     strategy:
18 |       matrix:
19 |         node-version: [10.x, 12.x]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v2
23 |     - name: Use Node.js ${{ matrix.node-version }}
24 |       uses: actions/setup-node@v1
25 |       with:
26 |         node-version: ${{ matrix.node-version }}
27 |     - run: npm ci
28 |     - run: npm run build --if-present
29 |     - run: npm test
30 |       env:
31 |         CI: true
32 | 


--------------------------------------------------------------------------------
/src/__tests__/SitemapStream.js:
--------------------------------------------------------------------------------
 1 | const SitemapStream = require('../SitemapStream');
 2 | 
 3 | describe('#SitemapStream', () => {
 4 |   const stream = SitemapStream();
 5 | 
 6 |   test('should be a function', () => {
 7 |     expect(SitemapStream).toBeInstanceOf(Function);
 8 |   });
 9 | 
10 |   describe('#getPath', () => {
11 |     test('should have getPath method', () => {
12 |       expect(stream).toHaveProperty('getPath');
13 |     });
14 | 
15 |     test('should return path string', () => {
16 |       const path = stream.getPath();
17 |       expect(typeof path).toBe('string');
18 |     });
19 |   });
20 | 
21 |   describe('#write', () => {
22 |     test('should have write method', () => {
23 |       expect(stream).toHaveProperty('write');
24 |     });
25 |   });
26 | 
27 |   describe('#end', () => {
28 |     test('should have end method', () => {
29 |       expect(stream).toHaveProperty('end');
30 |     });
31 |   });
32 | });
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Lars Graubner
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/src/SitemapStream.js:
--------------------------------------------------------------------------------
 1 | const path = require('path');
 2 | const rand = require('crypto-random-string');
 3 | const os = require('os');
 4 | const fs = require('fs');
 5 | const escapeUnsafe = require('./helpers/escapeUnsafe');
 6 | 
 7 | module.exports = function SitemapStream() {
 8 |   const tmpPath = path.join(os.tmpdir(), `sitemap_${rand(10)}`);
 9 |   const stream = fs.createWriteStream(tmpPath);
10 | 
11 |   stream.write('<?xml version="1.0" encoding="utf-8" standalone="yes" ?>');
12 |   stream.write(
13 |     '\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
14 |   );
15 | 
16 |   const getPath = () => tmpPath;
17 | 
18 |   const write = (url, currentDateTime, changeFreq, priority) => {
19 |     const escapedUrl = escapeUnsafe(url);
20 |     stream.write('\n  <url>\n');
21 |     stream.write(`    <loc>${escapedUrl}</loc>\n`);
22 |     if (currentDateTime) {
23 |       stream.write(`    <lastmod>${currentDateTime}</lastmod>\n`);
24 |     }
25 |     if (changeFreq) {
26 |       stream.write(`    <changefreq>${changeFreq}</changefreq>\n`);
27 |     }
28 |     if (priority) {
29 |       stream.write(`    <priority>${priority}</priority>\n`);
30 |     }
31 |     stream.write('  </url>');
32 |   };
33 | 
34 |   const end = () => {
35 |     stream.write('\n</urlset>');
36 |     stream.end();
37 |   };
38 | 
39 |   return {
40 |     getPath,
41 |     write,
42 |     end,
43 |   };
44 | };
45 | 


--------------------------------------------------------------------------------
/src/__tests__/SitemapRotator.js:
--------------------------------------------------------------------------------
 1 | const SitemapRotator = require('../SitemapRotator');
 2 | 
 3 | describe('#SitemapRotator', () => {
 4 |   const rotator = SitemapRotator(2);
 5 |   rotator.addURL('http://atest.com');
 6 | 
 7 |   afterAll(() => {
 8 |     rotator.finish();
 9 |   });
10 | 
11 |   test('should be a function', () => {
12 |     expect(SitemapRotator).toBeInstanceOf(Function);
13 |   });
14 | 
15 |   describe('#addURL', () => {
16 |     test('should have addURL method', () => {
17 |       expect(rotator).toHaveProperty('addURL');
18 |     });
19 |   });
20 | 
21 |   describe('#getPaths', () => {
22 |     test('should have getPaths method', () => {
23 |       expect(rotator).toHaveProperty('getPaths');
24 |     });
25 | 
26 |     test('should return array of paths', () => {
27 |       const paths = rotator.getPaths();
28 |       const expected = [expect.stringMatching(/.+/)];
29 |       expect(paths).toEqual(expect.arrayContaining(expected));
30 |     });
31 | 
32 |     test('should rotate sitemaps when max entries is reached', () => {
33 |       rotator.addURL('http://atest.com/a');
34 |       rotator.addURL('http://atest.com/b');
35 | 
36 |       expect(rotator.getPaths()).toHaveLength(2);
37 |     });
38 |   });
39 | 
40 |   describe('#finish', () => {
41 |     test('should have finish method', () => {
42 |       expect(rotator).toHaveProperty('finish');
43 |     });
44 |   });
45 | });
46 | 


--------------------------------------------------------------------------------
/src/helpers/__tests__/escapeUnsafe.js:
--------------------------------------------------------------------------------
 1 | const escapeUnsafe = require('../escapeUnsafe');
 2 | 
 3 | describe('#escapeUnsafe', () => {
 4 |   test('should be a function', () => {
 5 |     expect(escapeUnsafe).toBeInstanceOf(Function);
 6 |   });
 7 | 
 8 |   test('should escape < characters', () => {
 9 |     const url = 'http://test.com/<>&\'"<>&\'"';
10 |     const escapedUrl = escapeUnsafe(url);
11 | 
12 |     expect(url).toMatch(/</);
13 |     expect(escapedUrl).not.toMatch(/</);
14 |   });
15 | 
16 |   test('should escape > characters', () => {
17 |     const url = 'http://test.com/<>&\'"<>&\'"';
18 |     const escapedUrl = escapeUnsafe(url);
19 | 
20 |     expect(url).toMatch(/>/);
21 |     expect(escapedUrl).not.toMatch(/>/);
22 |   });
23 | 
24 |   test('should escape & characters', () => {
25 |     const url = 'http://test.com/<>&\'"<>&\'"';
26 |     const escapedUrl = escapeUnsafe(url);
27 | 
28 |     expect(url).toMatch(/&/);
29 |     // Regex with negative lookahead, matches non escaping &'s
30 |     expect(escapedUrl).not.toMatch(/&(?!(?:apos|quot|[gl]t|amp);|#)/);
31 |   });
32 | 
33 |   test("should escape ' characters", () => {
34 |     const url = 'http://test.com/<>&\'"<>&\'"';
35 |     const escapedUrl = escapeUnsafe(url);
36 | 
37 |     expect(url).toMatch(/'/);
38 |     expect(escapedUrl).not.toMatch(/'/);
39 |   });
40 | 
41 |   test('should escape " characters', () => {
42 |     const url = 'http://test.com/<>&\'"<>&\'"';
43 |     const escapedUrl = escapeUnsafe(url);
44 | 
45 |     expect(url).toMatch(/"/);
46 |     expect(escapedUrl).not.toMatch(/"/);
47 |   });
48 | });
49 | 


--------------------------------------------------------------------------------
/src/discoverResources.js:
--------------------------------------------------------------------------------
 1 | const url = require('url');
 2 | const cheerio = require('cheerio');
 3 | 
 4 | module.exports = (buffer, queueItem) => {
 5 |   const $ = cheerio.load(buffer.toString('utf8'));
 6 | 
 7 |   const metaRobots = $('meta[name="robots"]');
 8 | 
 9 |   if (metaRobots.length && /nofollow/i.test(metaRobots.attr('content'))) {
10 |     return [];
11 |   }
12 | 
13 |   const links = $('a[href]').map(function iteratee() {
14 |     let href = $(this).attr('href');
15 | 
16 |     // exclude "mailto:" etc
17 |     if (/^[a-z]+:(?!\/\/)/i.test(href)) {
18 |       return null;
19 |     }
20 | 
21 |     // exclude rel="nofollow" links
22 |     const rel = $(this).attr('rel');
23 |     if (/nofollow/i.test(rel)) {
24 |       return null;
25 |     }
26 | 
27 |     // remove anchors
28 |     href = href.replace(/(#.*)$/, '');
29 |     
30 |     //remove basic authentication
31 |     href = href.replace(/^\/?([^/]*@)/, '');
32 | 
33 |     // handle "//"
34 |     if (/^\/\//.test(href)) {
35 |       return `${queueItem.protocol}:${href}`;
36 |     }
37 | 
38 |     // check if link is relative
39 |     // (does not start with "http(s)" or "//")
40 |     if (!/^https?:\/\//.test(href)) {
41 |       const base = $('base').first();
42 |       if (base.length) {
43 |         // base tag is set, prepend it
44 |         if (base.attr('href') !== undefined) {
45 |           // base tags sometimes don't define href, they sometimes they only set target="_top", target="_blank"
46 |           href = url.resolve(base.attr('href'), href);
47 |         }
48 |       }
49 | 
50 |       // handle links such as "./foo", "../foo", "/foo"
51 |       if (/^\.\.?\/.*/.test(href) || /^\/[^/].*/.test(href)) {
52 |         href = url.resolve(queueItem.url, href);
53 |       }
54 |     }
55 | 
56 |     return href;
57 |   });
58 | 
59 |   return links.get();
60 | };
61 | 


--------------------------------------------------------------------------------
/src/SitemapRotator.js:
--------------------------------------------------------------------------------
 1 | const SitemapStream = require('./SitemapStream');
 2 | const getCurrentDateTime = require('./helpers/getCurrentDateTime');
 3 | 
 4 | module.exports = function SitemapRotator(
 5 |   maxEntries,
 6 |   lastModEnabled,
 7 |   changeFreq,
 8 |   priorityMap
 9 | ) {
10 |   const sitemaps = [];
11 |   let count = 0;
12 |   let current = null;
13 | 
14 |   // return temp sitemap paths
15 |   const getPaths = () =>
16 |     sitemaps.reduce((arr, map) => {
17 |       arr.push(map.getPath());
18 |       return arr;
19 |     }, []);
20 | 
21 |   // adds url to stream
22 |   const addURL = (url, depth, lastMod = getCurrentDateTime()) => {
23 |     const currentDateTime = lastModEnabled ? lastMod : null;
24 | 
25 |     // exclude existing sitemap.xml
26 |     if (/sitemap\.xml$/.test(url)) {
27 |       return;
28 |     }
29 | 
30 |     // create stream if none exists
31 |     if (current === null) {
32 |       current = SitemapStream();
33 |       sitemaps.push(current);
34 |     }
35 | 
36 |     // rotate stream
37 |     if (count === maxEntries) {
38 |       current.end();
39 |       current = SitemapStream();
40 |       sitemaps.push(current);
41 |       count = 0;
42 |     }
43 | 
44 |     let priority = '';
45 | 
46 |     // if priorityMap exists, set priority based on depth
47 |     // if depth is greater than map length, use the last value in the priorityMap
48 |     if (priorityMap && priorityMap.length > 0) {
49 |       priority = priorityMap[depth - 1]
50 |         ? priorityMap[depth - 1]
51 |         : priorityMap[priorityMap.length - 1];
52 |     }
53 | 
54 |     current.write(url, currentDateTime, changeFreq, priority);
55 | 
56 |     count += 1;
57 |   };
58 | 
59 |   // close stream
60 |   const finish = () => {
61 |     if (current) {
62 |       current.end();
63 |     }
64 |   };
65 | 
66 |   return {
67 |     getPaths,
68 |     addURL,
69 |     finish
70 |   };
71 | };
72 | 


--------------------------------------------------------------------------------
/src/createCrawler.js:
--------------------------------------------------------------------------------
 1 | const Crawler = require('simplecrawler');
 2 | const has = require('lodash/has');
 3 | 
 4 | const discoverResources = require('./discoverResources');
 5 | const stringifyURL = require('./helpers/stringifyURL');
 6 | 
 7 | module.exports = (uri, options = {}) => {
 8 |   // excluded filetypes
 9 |   const exclude = [
10 |     'gif',
11 |     'jpg',
12 |     'jpeg',
13 |     'png',
14 |     'ico',
15 |     'bmp',
16 |     'ogg',
17 |     'webp',
18 |     'mp4',
19 |     'webm',
20 |     'mp3',
21 |     'ttf',
22 |     'woff',
23 |     'json',
24 |     'rss',
25 |     'atom',
26 |     'gz',
27 |     'zip',
28 |     'rar',
29 |     '7z',
30 |     'css',
31 |     'js',
32 |     'gzip',
33 |     'exe',
34 |     'svg'
35 |   ].join('|');
36 | 
37 |   const extRegex = new RegExp(`\\.(${exclude})$`, 'i');
38 | 
39 |   const crawler = new Crawler(uri.href);
40 | 
41 |   Object.keys(options).forEach(o => {
42 |     if (has(crawler, o)) {
43 |       crawler[o] = options[o];
44 |     } else if (o === 'crawlerMaxDepth') {
45 |       // eslint-disable-next-line
46 |       console.warn(
47 |         'Option "crawlerMaxDepth" is deprecated. Please use "maxDepth".'
48 |       );
49 |       if (!options.maxDepth) {
50 |         crawler.maxDepth = options.crawlerMaxDepth;
51 |       }
52 |     }
53 |   });
54 | 
55 |   // use custom discoverResources function
56 |   crawler.discoverResources = discoverResources;
57 | 
58 |   // set crawler options
59 |   // see https://github.com/cgiffard/node-simplecrawler#configuration
60 |   crawler.initialPath = uri.pathname !== '' ? uri.pathname : '/';
61 |   crawler.initialProtocol = uri.protocol.replace(':', '');
62 | 
63 |   // restrict to subpages if path is provided
64 |   crawler.addFetchCondition(parsedUrl => {
65 |     const initialURLRegex = new RegExp(`${uri.pathname}.*`);
66 |     return stringifyURL(parsedUrl).match(initialURLRegex);
67 |   });
68 | 
69 |   // file type exclusion
70 |   crawler.addFetchCondition(parsedUrl => !parsedUrl.path.match(extRegex));
71 | 
72 |   return crawler;
73 | };
74 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "sitemap-generator",
 3 |   "version": "0.0.0-semantically-released",
 4 |   "description": "Easily create XML sitemaps for your website.",
 5 |   "homepage": "https://github.com/lgraubner/sitemap-generator",
 6 |   "author": "Lars Graubner <mail@larsgraubner.de> (https://larsgraubner.com)",
 7 |   "keywords": [
 8 |     "sitemap",
 9 |     "xml",
10 |     "sitemap.xml",
11 |     "generator",
12 |     "crawler",
13 |     "seo",
14 |     "google",
15 |     "ecosystem:node"
16 |   ],
17 |   "main": "src/index.js",
18 |   "repository": {
19 |     "type": "git",
20 |     "url": "https://github.com/lgraubner/sitemap-generator.git"
21 |   },
22 |   "bugs": {
23 |     "url": "https://github.com/lgraubner/sitemap-generator/issues"
24 |   },
25 |   "dependencies": {
26 |     "async": "2.6.1",
27 |     "cheerio": "1.0.0-rc.2",
28 |     "cp-file": "6.0.0",
29 |     "crypto-random-string": "1.0.0",
30 |     "date-fns": "1.29.0",
31 |     "lodash": "4.17.20",
32 |     "mitt": "1.1.3",
33 |     "normalize-url": "3.3.0",
34 |     "simplecrawler": "1.1.9",
35 |     "url-parse": "1.4.7"
36 |   },
37 |   "engines": {
38 |     "node": ">=10"
39 |   },
40 |   "license": "MIT",
41 |   "files": [
42 |     "src",
43 |     "!**/__tests__"
44 |   ],
45 |   "devDependencies": {
46 |     "eslint": "5.8.0",
47 |     "husky": "1.1.2",
48 |     "jest": "24.8.0",
49 |     "lint-staged": "7.3.0",
50 |     "prettier": "1.14.3"
51 |   },
52 |   "scripts": {
53 |     "lint": "eslint src",
54 |     "test": "jest",
55 |     "test:watch": "npm test -- --watch",
56 |     "flow": "flow"
57 |   },
58 |   "lint-staged": {
59 |     "*.js": [
60 |       "eslint --fix",
61 |       "prettier --write",
62 |       "git add"
63 |     ]
64 |   },
65 |   "prettier": {
66 |     "singleQuote": true
67 |   },
68 |   "eslintConfig": {
69 |     "parserOptions": {
70 |       "ecmaVersion": 6
71 |     },
72 |     "extends": "eslint:recommended",
73 |     "env": {
74 |       "node": true,
75 |       "jest": true
76 |     }
77 |   },
78 |   "husky": {
79 |     "hooks": {
80 |       "pre-commit": "lint-staged"
81 |     }
82 |   },
83 |   "release": {
84 |     "tagFormat": "${version}"
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
  1 | const fs = require('fs');
  2 | const http = require('http');
  3 | const path = require('path');
  4 | const parseURL = require('url-parse');
  5 | const eachSeries = require('async/eachSeries');
  6 | const cpFile = require('cp-file');
  7 | const normalizeUrl = require('normalize-url');
  8 | const mitt = require('mitt');
  9 | const format = require('date-fns/format');
 10 | 
 11 | const createCrawler = require('./createCrawler');
 12 | const SitemapRotator = require('./SitemapRotator');
 13 | const createSitemapIndex = require('./createSitemapIndex');
 14 | const extendFilename = require('./helpers/extendFilename');
 15 | const validChangeFreq = require('./helpers/validChangeFreq');
 16 | 
 17 | module.exports = function SitemapGenerator(uri, opts) {
 18 |   const defaultOpts = {
 19 |     stripQuerystring: true,
 20 |     maxEntriesPerFile: 50000,
 21 |     maxDepth: 0,
 22 |     filepath: path.join(process.cwd(), 'sitemap.xml'),
 23 |     userAgent: 'Node/SitemapGenerator',
 24 |     respectRobotsTxt: true,
 25 |     ignoreInvalidSSL: true,
 26 |     timeout: 30000,
 27 |     decodeResponses: true,
 28 |     lastMod: false,
 29 |     changeFreq: '',
 30 |     priorityMap: [],
 31 |     ignoreAMP: true,
 32 |     ignore: null
 33 |   };
 34 | 
 35 |   if (!uri) {
 36 |     throw new Error('Requires a valid URL.');
 37 |   }
 38 | 
 39 |   const options = Object.assign({}, defaultOpts, opts);
 40 | 
 41 |   // if changeFreq option was passed, check to see if the value is valid
 42 |   if (opts && opts.changeFreq) {
 43 |     options.changeFreq = validChangeFreq(opts.changeFreq);
 44 |   }
 45 | 
 46 |   const emitter = mitt();
 47 | 
 48 |   const parsedUrl = parseURL(
 49 |     normalizeUrl(uri, {
 50 |       stripWWW: false,
 51 |       removeTrailingSlash: false
 52 |     })
 53 |   );
 54 | 
 55 |   // only resolve if sitemap path is truthy (a string preferably)
 56 |   const sitemapPath = options.filepath && path.resolve(options.filepath);
 57 | 
 58 |   // we don't care about invalid certs
 59 |   process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0';
 60 | 
 61 |   const crawler = createCrawler(parsedUrl, options);
 62 | 
 63 |   // create sitemap stream
 64 |   const sitemap = SitemapRotator(
 65 |     options.maxEntriesPerFile,
 66 |     options.lastMod,
 67 |     options.changeFreq,
 68 |     options.priorityMap
 69 |   );
 70 | 
 71 |   const emitError = (code, url) => {
 72 |     emitter.emit('error', {
 73 |       code,
 74 |       message: http.STATUS_CODES[code],
 75 |       url
 76 |     });
 77 |   };
 78 | 
 79 |   crawler.on('fetch404', ({ url }) => emitError(404, url));
 80 |   crawler.on('fetchtimeout', ({ url }) => emitError(408, url));
 81 |   crawler.on('fetch410', ({ url }) => emitError(410, url));
 82 |   crawler.on('fetcherror', (queueItem, response) =>
 83 |     emitError(response.statusCode, queueItem.url)
 84 |   );
 85 | 
 86 |   crawler.on('fetchclienterror', (queueError, errorData) => {
 87 |     if (errorData.code === 'ENOTFOUND') {
 88 |       emitError(404, `Site ${JSON.stringify(queueError)} could not be found. REQUEST: ${JSON.stringify(errorData)}`);
 89 |     } else {
 90 |       emitError(400, errorData.message);
 91 |     }
 92 |   });
 93 | 
 94 |   crawler.on('fetchdisallowed', ({ url }) => emitter.emit('ignore', url));
 95 | 
 96 |   // fetch complete event
 97 |   crawler.on('fetchcomplete', (queueItem, page) => {
 98 |     const { url, depth } = queueItem;
 99 | 
100 |     if (
101 |       (opts.ignore && opts.ignore(url)) ||
102 |       /(<meta(?=[^>]+noindex).*?>)/.test(page) || // check if robots noindex is present
103 |       (options.ignoreAMP && /<html[^>]+(amp|⚡)[^>]*>/.test(page)) // check if it's an amp page
104 |     ) {
105 |       emitter.emit('ignore', url);
106 |     } else {
107 |       emitter.emit('add', url);
108 | 
109 |       if (sitemapPath !== null) {
110 |         // eslint-disable-next-line
111 |         const lastMod = queueItem.stateData.headers['last-modified'];
112 |         sitemap.addURL(url, depth, lastMod && format(lastMod, 'YYYY-MM-DD'));
113 |       }
114 |     }
115 |   });
116 | 
117 |   crawler.on('complete', () => {
118 |     sitemap.finish();
119 | 
120 |     const sitemaps = sitemap.getPaths();
121 | 
122 |     const cb = () => emitter.emit('done');
123 | 
124 |     if (sitemapPath !== null) {
125 |       // move files
126 |       if (sitemaps.length > 1) {
127 |         // multiple sitemaps
128 |         let count = 1;
129 |         eachSeries(
130 |           sitemaps,
131 |           (tmpPath, done) => {
132 |             const newPath = extendFilename(sitemapPath, `_part${count}`);
133 | 
134 |             // copy and remove tmp file
135 |             cpFile(tmpPath, newPath).then(() => {
136 |               fs.unlink(tmpPath, () => {
137 |                 done();
138 |               });
139 |             });
140 | 
141 |             count += 1;
142 |           },
143 |           () => {
144 |             const filename = path.basename(sitemapPath);
145 |             fs.writeFile(
146 |               sitemapPath,
147 |               createSitemapIndex(
148 |                 parsedUrl.toString(),
149 |                 filename,
150 |                 sitemaps.length
151 |               ),
152 |               cb
153 |             );
154 |           }
155 |         );
156 |       } else if (sitemaps.length) {
157 |         cpFile(sitemaps[0], sitemapPath).then(() => {
158 |           fs.unlink(sitemaps[0], cb);
159 |         });
160 |       } else {
161 |         cb();
162 |       }
163 |     } else {
164 |       cb();
165 |     }
166 |   });
167 | 
168 |   return {
169 |     start: () => crawler.start(),
170 |     stop: () => crawler.stop(),
171 |     getCrawler: () => crawler,
172 |     getSitemap: () => sitemap,
173 |     queueURL: url => {
174 |       crawler.queueURL(url, undefined, false);
175 |     },
176 |     on: emitter.on,
177 |     off: emitter.off
178 |   };
179 | };
180 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Sitemap Generator
  2 | 
  3 | [![Travis](https://img.shields.io/travis/lgraubner/sitemap-generator.svg)](https://travis-ci.org/lgraubner/sitemap-generator) [![David](https://img.shields.io/david/lgraubner/sitemap-generator.svg)](https://david-dm.org/lgraubner/sitemap-generator) [![npm](https://img.shields.io/npm/v/sitemap-generator.svg)](https://www.npmjs.com/package/sitemap-generator)
  4 | 
  5 | > Easily create XML sitemaps for your website.
  6 | 
  7 | Generates a sitemap by crawling your site. Uses streams to efficiently write the sitemap to your drive and runs asynchronously to avoid blocking the thread. Is cappable of creating multiple sitemaps if threshold is reached. Respects robots.txt and meta tags.
  8 | 
  9 | This package is not meant to be used in a production code base directly, but rather on the deployed product. This means you develop your app/website as usual, deploy it and create the sitemap with this tool _afterwards_. The simplest way is to use the [CLI](https://github.com/lgraubner/sitemap-generator-cli) (this is a different package!) to create the sitemap on the command line. If you have a more advanced usecase or want to adjust the crawler behavior you should use the programmtic version (this package). Create the crawler as needed and simply run it via `node mycrawler.js`.
 10 | 
 11 | ## Table of contents
 12 | 
 13 | - [Install](#install)
 14 | - [Usage](#usage)
 15 | - [API](#api)
 16 | - [Options](#options)
 17 | - [Events](#events)
 18 | - [FAQ](#faq)
 19 | - [License](#license)
 20 | 
 21 | ## Install
 22 | 
 23 | This module is available on [npm](https://www.npmjs.com/).
 24 | 
 25 | ```
 26 | $ npm install -S sitemap-generator
 27 | ```
 28 | 
 29 | This module is running only with Node.js and is not meant to be used in the browser.
 30 | 
 31 | ## Usage
 32 | 
 33 | ```JavaScript
 34 | const SitemapGenerator = require('sitemap-generator');
 35 | 
 36 | // create generator
 37 | const generator = SitemapGenerator('http://example.com', {
 38 |   stripQuerystring: false
 39 | });
 40 | 
 41 | // register event listeners
 42 | generator.on('done', () => {
 43 |   // sitemaps created
 44 | });
 45 | 
 46 | // start the crawler
 47 | generator.start();
 48 | ```
 49 | 
 50 | The crawler will fetch all folder URL pages and file types [parsed by Google](https://support.google.com/webmasters/answer/35287?hl=en). If present the `robots.txt` will be taken into account and possible rules are applied for each URL to consider if it should be added to the sitemap. Also the crawler will not fetch URL's from a page if the robots meta tag with the value `nofollow` is present and ignore them completely if `noindex` rule is present. The crawler is able to apply the `base` value to found links.
 51 | 
 52 | ## API
 53 | 
 54 | The generator offers straightforward methods to start and stop it. You can also add URL's manually.
 55 | 
 56 | ### start()
 57 | 
 58 | Starts crawler asynchronously and writes sitemap to disk.
 59 | 
 60 | ### stop()
 61 | 
 62 | Stops the running crawler and halts the sitemap generation.
 63 | 
 64 | ### getCrawler()
 65 | 
 66 | Returns the crawler instance. For more information about the crawler check the [simplecrawler docs](https://github.com/simplecrawler/simplecrawler#readme).
 67 | 
 68 | This can be useful to ignore certain sites and don't add them to the sitemap.
 69 | 
 70 | ```JavaScript
 71 | const crawler = generator.getCrawler();
 72 | crawler.addFetchCondition((queueItem, referrerQueueItem, callback) => {
 73 |   callback(null, !queueItem.path.match(/myregex/));
 74 | });
 75 | ```
 76 | 
 77 | ### getSitemap()
 78 | 
 79 | Returns the sitemap instance (`SitemapRotator`).
 80 | 
 81 | This can be useful to add static URLs to the sitemap:
 82 | 
 83 | ```JavaScript
 84 | const crawler = generator.getCrawler()
 85 | const sitemap = generator.getSitemap()
 86 | 
 87 | // Add static URL on crawl init.
 88 | crawler.on('crawlstart', () => {
 89 |   sitemap.addURL('/my/static/url')
 90 | })
 91 | ````
 92 | 
 93 | ### queueURL(url)
 94 | 
 95 | Add a URL to crawler's queue. Useful to help crawler fetch pages it can't find itself.
 96 | 
 97 | ## Options
 98 | 
 99 | There are a couple of options to adjust the sitemap output. In addition to the options beneath the options of the used crawler can be changed. For a complete list please check it's [official documentation](https://github.com/simplecrawler/simplecrawler#configuration).
100 | 
101 | ```JavaScript
102 | var generator = SitemapGenerator('http://example.com', {
103 |   maxDepth: 0,
104 |   filepath: './sitemap.xml',
105 |   maxEntriesPerFile: 50000,
106 |   stripQuerystring: true
107 | });
108 | ```
109 | 
110 | ### changeFreq
111 | 
112 | Type: `string`  
113 | Default: `undefined`
114 | 
115 | If defined, adds a `<changefreq>` line to each URL in the sitemap. Possible values are `always`, `hourly`, `daily`, `weekly`, `monthly`, `yearly`, `never`. All other values are ignored.
116 | 
117 | ### filepath
118 | 
119 | Type: `string`  
120 | Default: `./sitemap.xml`
121 | 
122 | Filepath for the new sitemap. If multiple sitemaps are created "part\_$index" is appended to each filename. If you don't want to write a file at all you can pass `null` as filepath.
123 | 
124 | ### httpAgent
125 | 
126 | Type: `HTTPAgent`  
127 | Default: `http.globalAgent`
128 | 
129 | Controls what HTTP agent to use. This is useful if you want configure HTTP connection through a HTTP/HTTPS proxy (see [http-proxy-agent](https://www.npmjs.com/package/http-proxy-agent)).
130 | 
131 | ### httpsAgent
132 | 
133 | Type: `HTTPAgent`  
134 | Default: `https.globalAgent`
135 | 
136 | Controls what HTTPS agent to use. This is useful if you want configure HTTPS connection through a HTTP/HTTPS proxy (see [https-proxy-agent](https://www.npmjs.com/package/https-proxy-agent)).
137 | 
138 | Example:
139 | 
140 | ```JavaScript
141 | // don't forget to:
142 | // npm i http-proxy-agent https-proxy-agent
143 | const HttpProxyAgent = require("http-proxy-agent");
144 | const HttpsProxyAgent = require("https-proxy-agent");
145 | const proxyAddress = 'http://localhost:1234';
146 | const httpProxyAgent = new HttpProxyAgent(proxyAddress);
147 | const httpsProxyAgent = new HttpsProxyAgent(proxyAddress);
148 | options.httpAgent = httpProxyAgent;
149 | options.httpsAgent = httpsProxyAgent;
150 | ```
151 | 
152 | ### ignore(url)
153 | 
154 | Apply a test condition to a URL before it's added to the sitemap.
155 | 
156 | Type: `function`  
157 | Default: `null`
158 | 
159 | Example:
160 | 
161 | ```JavaScript
162 | const generator = SitemapGenerator(url, {
163 |   ignore: url => {
164 |     // Prevent URLs from being added that contain `<pattern>`.
165 |     return /<pattern>/g.test(url)
166 |   }
167 | })
168 | ```
169 | 
170 | ### ignoreAMP
171 | 
172 | Type: `boolean`  
173 | Default: `true`
174 | 
175 | Indicates whether [Google AMP pages](https://www.ampproject.org/) should be ignored and not be added to the sitemap.
176 | 
177 | ### lastMod
178 | 
179 | Type: `boolean`  
180 | Default: `false`
181 | 
182 | Whether to add a `<lastmod>` line to each URL in the sitemap. If present the responses `Last-Modified` header will be used. Otherwise todays date is added.
183 | 
184 | ### maxEntriesPerFile
185 | 
186 | Type: `number`  
187 | Default: `50000`
188 | 
189 | Google limits the maximum number of URLs in one sitemap to 50000. If this limit is reached the sitemap-generator creates another sitemap. A sitemap index file will be created as well.
190 | 
191 | ### priorityMap
192 | 
193 | Type: `array`  
194 | Default: `[]`
195 | 
196 | If provided, adds a `<priority>` line to each URL in the sitemap. Each value in priorityMap array corresponds with the depth of the URL being added. For example, the priority value given to a URL equals `priorityMap[depth - 1]`. If a URL's depth is greater than the length of the priorityMap array, the last value in the array will be used. Valid values are between `1.0` and `0.0`.
197 | 
198 | Example:
199 | 
200 | ```javascript
201 | [1.0, 0.8, 0.6, 0.4, 0.2, 0]
202 | ```
203 | 
204 | ### userAgent
205 | 
206 | Type: `string`  
207 | Default: `Node/SitemapGenerator`
208 | 
209 | Change the default crawler user agent.
210 | 
211 | ## Events
212 | 
213 | The Sitemap Generator emits several events which can be listened to.
214 | 
215 | ### `add`
216 | 
217 | Triggered when the crawler successfully added a resource to the sitemap. Passes the url as argument.
218 | 
219 | ```JavaScript
220 | generator.on('add', (url) => {
221 |   // log url
222 | });
223 | ```
224 | 
225 | ### `done`
226 | 
227 | Triggered when the crawler finished and the sitemap is created.
228 | 
229 | ```JavaScript
230 | generator.on('done', () => {
231 |   // sitemaps created
232 | });
233 | ```
234 | 
235 | ### `error`
236 | 
237 | Thrown if there was an error while fetching an URL. Passes an object with the http status code, a message and the url as argument.
238 | 
239 | ```JavaScript
240 | generator.on('error', (error) => {
241 |   console.log(error);
242 |   // => { code: 404, message: 'Not found.', url: 'http://example.com/foo' }
243 | });
244 | ```
245 | 
246 | ### `ignore`
247 | 
248 | If an URL matches a disallow rule in the `robots.txt` file or meta robots noindex is present this event is triggered. The URL will not be added to the sitemap. Passes the ignored url as argument.
249 | 
250 | ```JavaScript
251 | generator.on('ignore', (url) => {
252 |   // log ignored url
253 | });
254 | ```
255 | 
256 | ## FAQ
257 | 
258 | <details>
259 | <summary>Does this work with React, Angular, ...</summary>
260 | <p>This package don't care what frameworks and technologies you are using under the hood. The only requirement is, that your URL's return valid HTML. Therefore SSR (server side rendering) is required for single page apps as no JavaScript is executed.</p>
261 | </details>
262 | 
263 | <details>
264 | <summary>Where to put this code</summary>
265 | <p>This is basically up to you. You can execute this code manually and upload your sitemap by hand, or you can put this on your server and run this periodically to keep your sitemap up to date.</p>
266 | </details>
267 | 
268 | <details>
269 | <summary>Should I use this package or the CLI</summary>
270 | <p>The CLI should suffice most of the common use cases. It has several options to tweak in case you want it to behave differently. If your use case is more advanced and you need fine control about what the crawler should fetch, you should use this package and the programmatic API.</p>
271 | </details>
272 | 
273 | ## License
274 | 
275 | [MIT](https://github.com/lgraubner/sitemap-generator/blob/master/LICENSE) © [Lars Graubner](https://larsgraubner.com)
276 | 


--------------------------------------------------------------------------------