├── .editorconfig ├── .eslintrc.js ├── .gitattributes ├── .gitignore ├── .remarkrc.js ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── __tests__ ├── __snapshots__ │ └── standalone.test.js.snap ├── fixtures │ └── robots-txt.config.js └── standalone.test.js ├── babel.config.js ├── husky.config.js ├── lint-staged.config.js ├── package-lock.json ├── package.json ├── readme.md └── src ├── cli.js ├── index.js └── standalone.js /.editorconfig: -------------------------------------------------------------------------------- 1 | # For more information please visit http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = space 6 | indent_size = 2 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | 12 | [*.php] 13 | indent_size = 4 14 | 15 | [*.py] 16 | indent_size = 4 17 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | module.exports = { 4 | extends: [ 5 | "plugin:itgalaxy/script", 6 | "plugin:itgalaxy/esnext", 7 | "plugin:itgalaxy/node", 8 | ], 9 | overrides: [ 10 | // Source 11 | { 12 | extends: ["plugin:itgalaxy/module"], 13 | // Exclude nested tests 14 | excludedFiles: ["**/__tests__/**/*", "**/__mocks__/**/*", "**/*.md"], 15 | files: ["src/**/*"], 16 | rules: { 17 | // Allow to use ES module syntax 18 | // You should use babel if your node version is not supported ES syntax module, dynamic loading ES modules or other features 19 | "node/no-unsupported-features/es-syntax": [ 20 | "error", 21 | { ignores: ["modules", "dynamicImport"] }, 22 | ], 23 | }, 24 | }, 25 | 26 | // Jest 27 | { 28 | extends: ["plugin:itgalaxy/dirty", "plugin:itgalaxy/jest"], 29 | excludedFiles: ["**/*.md"], 30 | files: ["**/__tests__/**/*", "**/__mocks__/**/*"], 31 | rules: { 32 | // Test can be written with using ES module syntax or CommonJS module syntax 33 | "node/no-unsupported-features/es-syntax": [ 34 | "error", 35 | { ignores: ["modules", "dynamicImport"] }, 36 | ], 37 | 38 | // Allow to use `console` (example - `mocking`) 39 | "no-console": "off", 40 | }, 41 | }, 42 | 43 | // Markdown 44 | { 45 | extends: [ 46 | // Documentation files can contain ECMA and CommonJS modules 47 | "plugin:itgalaxy/dirty", 48 | "plugin:itgalaxy/markdown", 49 | ], 50 | files: ["**/*.md"], 51 | rules: { 52 | "no-unused-vars": "off", 53 | "no-console": "off", 54 | "import/no-unresolved": "off", 55 | "node/no-unpublished-require": "off", 56 | "node/no-unpublished-import": "off", 57 | // Documentation files can contain ES module syntax and CommonJS module syntax 58 | "node/no-unsupported-features/es-syntax": [ 59 | "error", 60 | { ignores: ["modules", "dynamicImport"] }, 61 | ], 62 | }, 63 | }, 64 | ], 65 | root: true, 66 | }; 67 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ## GITATTRIBUTES FOR WEB PROJECTS 2 | # 3 | # These settings are for any web project. 4 | # 5 | # Details per file setting: 6 | # text These files should be normalized (i.e. convert CRLF to LF). 7 | # binary These files are binary and should be left untouched. 8 | # 9 | # Note that binary is a macro for -text -diff. 10 | ###################################################################### 11 | 12 | ## AUTO-DETECT 13 | ## Handle line endings automatically for files detected as 14 | ## text and leave all files detected as binary untouched. 15 | ## This will handle all files NOT defined below. 16 | * text=auto 17 | 18 | ## Source code 19 | *.bat text eol=crlf 20 | *.coffee text 21 | *.css text 22 | *.htm text 23 | *.html text 24 | *.inc text 25 | *.ini text 26 | *.js text 27 | *.json text 28 | *.jsx text 29 | *.less text 30 | *.od text 31 | *.onlydata text 32 | *.php text 33 | *.pl text 34 | *.py text 35 | *.rb text 36 | *.sass text 37 | *.scm text 38 | *.scss text 39 | *.sh text eol=lf 40 | *.sql text 41 | *.styl text 42 | *.tag text 43 | *.ts text 44 | *.tsx text 45 | *.xml text 46 | *.xhtml text 47 | 48 | ## Docker 49 | *.dockerignore text 50 | Dockerfile text 51 | 52 | ## Documentation 53 | *.markdown text 54 | *.md text 55 | *.mdwn text 56 | *.mdown text 57 | *.mkd text 58 | *.mkdn text 59 | *.mdtxt text 60 | *.mdtext text 61 | *.txt text 62 | AUTHORS text 63 | CHANGELOG text 64 | CHANGES text 65 | CONTRIBUTING text 66 | COPYING text 67 | copyright text 68 | *COPYRIGHT* text 69 | INSTALL text 70 | license text 71 | LICENSE text 72 | NEWS text 73 | readme text 74 | *README* text 75 | TODO text 76 | 77 | ## Templates 78 | *.dot text 79 | *.ejs text 80 | *.haml text 81 | *.handlebars text 82 | *.hbs text 83 | *.hbt text 84 | *.jade text 85 | *.latte text 86 | *.mustache text 87 | *.njk text 88 | *.phtml text 89 | *.tmpl text 90 | *.tpl text 91 | *.twig text 92 | 93 | ## Linters 94 | .csslintrc text 95 | .eslintrc text 96 | .htmlhintrc text 97 | .jscsrc text 98 | .jshintrc text 99 | .jshintignore text 100 | .stylelintrc text 101 | 102 | ## Configs 103 | *.bowerrc text 104 | *.cnf text 105 | *.conf text 106 | *.config text 107 | .browserslistrc text 108 | .editorconfig text 109 | .gitattributes text 110 | .gitconfig text 111 | .gitignore text 112 | .htaccess text 113 | *.npmignore text 114 | *.yaml text 115 | *.yml text 116 | browserslist text 117 | Makefile text 118 | makefile text 119 | 120 | ## Heroku 121 | Procfile text 122 | .slugignore text 123 | 124 | ## Graphics 125 | *.ai binary 126 | *.bmp binary 127 | *.eps binary 128 | *.gif binary 129 | *.ico binary 130 | *.jng binary 131 | *.jp2 binary 132 | *.jpg binary 133 | *.jpeg binary 134 | *.jpx binary 135 | *.jxr binary 136 | *.pdf binary 137 | *.png binary 138 | *.psb binary 139 | *.psd binary 140 | *.svg text 141 | *.svgz binary 142 | *.tif binary 143 | *.tiff binary 144 | *.wbmp binary 145 | *.webp binary 146 | 147 | ## Audio 148 | *.kar binary 149 | *.m4a binary 150 | *.mid binary 151 | *.midi binary 152 | *.mp3 binary 153 | *.ogg binary 154 | *.ra binary 155 | 156 | ## Video 157 | *.3gpp binary 158 | *.3gp binary 159 | *.as binary 160 | *.asf binary 161 | *.asx binary 162 | *.fla binary 163 | *.flv binary 164 | *.m4v binary 165 | *.mng binary 166 | *.mov binary 167 | *.mp4 binary 168 | *.mpeg binary 169 | *.mpg binary 170 | *.ogv binary 171 | *.swc binary 172 | *.swf binary 173 | *.webm binary 174 | 175 | ## Archives 176 | *.7z binary 177 | *.gz binary 178 | *.jar binary 179 | *.rar binary 180 | *.tar binary 181 | *.zip binary 182 | 183 | ## Fonts 184 | *.ttf binary 185 | *.eot binary 186 | *.otf binary 187 | *.woff binary 188 | *.woff2 binary 189 | 190 | ## Executables 191 | *.exe binary 192 | *.pyc binary 193 | 194 | ## Lock files 195 | package-lock.json -diff 196 | yarn.lock -diff 197 | composer.lock -diff 198 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Include your project-specific ignores in this file 2 | # Read about how to use .gitignore: https://help.github.com/articles/ignoring-files 3 | # Useful .gitignore templates: https://github.com/github/gitignore 4 | 5 | # Git files 6 | *.diff 7 | *.orig 8 | *.rej 9 | 10 | # Numerous always-ignore extensions 11 | pids 12 | *.pid 13 | *.seed 14 | *.lock 15 | *.err 16 | *.tmp 17 | *.log* 18 | 19 | # Vim 20 | [._]*.s[a-w][a-z] 21 | [._]s[a-w][a-z] 22 | *.un~ 23 | Session.vim 24 | .netrwhist 25 | *~ 26 | 27 | # Text Editors 28 | .swo 29 | *.swp 30 | *.vi 31 | 32 | # Numerous always-ignore directories 33 | logs 34 | tmp 35 | 36 | # Windows files and directories 37 | Thumbs.db 38 | ehthumbs.db 39 | ehthumbs_vista.db 40 | Image.db 41 | Video.db 42 | TVThumb.db 43 | musicThumbs.db 44 | thumbcache_*.db 45 | 46 | # Mac files and directories 47 | .DS_Store 48 | .AppleDouble 49 | .LSOverride 50 | .Spotlight-V100 51 | .Trashes 52 | .AppleDB 53 | .AppleDesktop 54 | Network Trash Folder 55 | Temporary Items 56 | .apdisk 57 | 58 | # Thumbnails 59 | ._* 60 | 61 | # IntelliJ IDEA and other products 62 | *.iml 63 | .idea 64 | release 65 | 66 | # VSCode metadata 67 | .vscode 68 | 69 | # Sublime 70 | *.sublime-project 71 | *.sublime-workspace 72 | .sublimelinterrc 73 | 74 | # Eclipse 75 | .project 76 | .classpath 77 | .settings 78 | 79 | # gitkeep 80 | !.gitkeep 81 | 82 | # Directory for instrumented libs generated by `jscoverage/JSCover` 83 | lib-cov 84 | 85 | # Coverage directory used by tools like `istanbul`, `phpunit/php-code-coverage` and etc. 86 | coverage 87 | 88 | # nyc test coverage 89 | .nyc_output 90 | 91 | # node-waf configuration 92 | .lock-wscript 93 | 94 | # Compiled binary addons (http://nodejs.org/api/addons.html) 95 | build/Release 96 | 97 | # Dependency directories 98 | bower_components 99 | .bower-cache 100 | .bower-registry 101 | .bower-tmp 102 | node_modules 103 | jspm_packages 104 | 105 | # Optional npm cache directory 106 | .npm 107 | 108 | # Optional REPL history 109 | .node_repl_history 110 | 111 | # Ignore minified files 112 | *.min.* 113 | 114 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 115 | .grunt 116 | 117 | # Caches 118 | .cache 119 | .cache-loader 120 | .eslintcache 121 | .eclintercache 122 | .stylelintcache 123 | .sass-cache 124 | .cache-wordpress 125 | .phpcscache 126 | 127 | # npm 128 | npm-shrinkwrap.json 129 | !package-lock.json 130 | 131 | # Yarn 132 | !yarn.lock 133 | 134 | # Dotenv 135 | .env 136 | .env.* 137 | !.env*.example 138 | 139 | # Webpack stats 140 | stats*.json 141 | 142 | # Composer 143 | composer.phar 144 | vendor 145 | !composer.lock 146 | 147 | # Library 148 | dist 149 | -------------------------------------------------------------------------------- /.remarkrc.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | module.exports = { 4 | plugins: ["remark-preset-lint-itgalaxy"], 5 | }; 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | git: 4 | depth: 10 5 | 6 | branches: 7 | only: 8 | - master 9 | - next 10 | 11 | language: node_js 12 | 13 | cache: 14 | directories: 15 | - $HOME/.npm 16 | - node_modules 17 | 18 | matrix: 19 | include: 20 | - node_js: "10" 21 | script: npm run pretest 22 | env: CI=pretest 23 | - node_js: "10" 24 | script: npm run test:only 25 | env: CI=tests 10 26 | - node_js: "12" 27 | script: npm run test:only 28 | env: CI=tests 12 29 | - node_js: "14" 30 | script: npm run test:only 31 | env: CI=tests 14 32 | 33 | before_install: 34 | - npm install -g npm@latest 35 | 36 | install: 37 | - npm ci 38 | 39 | before_script: 40 | - node --version 41 | - npm --version 42 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines. 4 | 5 | ## [8.0.3](https://github.com/itgalaxy/generate-robotstxt/compare/v8.0.2...v8.0.3) (2020-06-12) 6 | 7 | ### Chore 8 | 9 | * update deps 10 | 11 | ## [8.0.2](https://github.com/itgalaxy/generate-robotstxt/compare/v8.0.1...v8.0.2) (2020-03-24) 12 | 13 | ### Chore 14 | 15 | * update deps 16 | 17 | ## [8.0.1](https://github.com/itgalaxy/generate-robotstxt/compare/v8.0.0...v8.0.1) (2020-02-19) 18 | 19 | ### Chore 20 | 21 | * update deps 22 | 23 | ## [8.0.0](https://github.com/itgalaxy/generate-robotstxt/compare/v7.1.0...v8.0.0) (2019-07-03) 24 | 25 | ### Breaking change 26 | 27 | * minimum require Node.js version is `10.13.0`. 28 | 29 | ## [7.1.0](https://github.com/itgalaxy/generate-robotstxt/compare/v6.0.1...v7.1.0) (2019-07-03) 30 | 31 | 32 | ### Features 33 | 34 | * cjs export ([#123](https://github.com/itgalaxy/generate-robotstxt/issues/123)) ([9c0507b](https://github.com/itgalaxy/generate-robotstxt/commit/9c0507b)) 35 | 36 | 37 | 38 | ## 7.0.0 - 2019-07-03 39 | 40 | - Changed: minimum require Node.js version is `8.9.0`. 41 | 42 | ## 6.0.1 - 2019-03-26 43 | 44 | - Chore: migrate on `ip-regex` to avoid lodash vulnerable libraries 45 | 46 | ## 6.0.0 - 2019-01-09 47 | 48 | - Chore: minimum required `node-fs` version is now `^7.0.1`. 49 | 50 | ### BREAKING CHANGE 51 | 52 | - Chore: minimum required `nodejs` version is now `>= 6.9.0`. 53 | 54 | ## 5.0.7 - 2018-05-22 55 | 56 | - Chore: minimum required `cosmiconfig` version is now `^5.0.0`. 57 | 58 | ## 5.0.6 - 2018-05-05 59 | 60 | - Fix: throw error when `policy` option is `null`. 61 | - Chore: minimum required `meow` version is now `^5.0.0`. 62 | - Chore: minimum required `node-fs` version is now `^6.0.0`. 63 | 64 | ## 5.0.5 - 2018-03-28 65 | 66 | - Fix: allow empty values `Disallow` directive (according [official spec](http://www.robotstxt.org/orig.html)). 67 | - Fix: don't generate empty `Clean-param`. 68 | 69 | ## 5.0.4 - 2018-01-16 70 | 71 | - Fix: typo in source code message errors. 72 | 73 | ## 5.0.3 - 2018-01-16 74 | 75 | - Chore: minimum required `cosmiconfig` version is now `^4.0.0`. 76 | 77 | ## 5.0.2 - 2017-12-12 78 | 79 | - Chore: minimum required `node-fs` version is now `^5.0.0`. 80 | 81 | ## 5.0.1 - 2017-11-28 82 | 83 | - Chore: minimum required `meow` version is now `^4.0.0`. 84 | 85 | ## 5.0.0 - 2017-11-15 86 | 87 | - Changed: use `[cosmiconfig](https://github.com/davidtheclark/cosmiconfig) for 88 | loading configuration. 89 | - Feature: in CLI if the parent directory does not exist when you write `robots.txt`, it's created. 90 | 91 | ## 4.0.4 - 2017-10-09 92 | 93 | - Chore: update deps. 94 | 95 | ## 4.0.3 - 2017-03-13 96 | 97 | - Fixed: `is-absolute-url` package semver. 98 | 99 | ## 4.0.2 - 2016-12-30 100 | 101 | - Fixed: `host` options is now processed based `URL`. 102 | - Fixed: thrown error if the `host` option being IP address. 103 | - Fixed: clarified error message on multiple and not string the `userAgent` 104 | option. 105 | - Fixed: `Host` directive is now not containing `80` port. 106 | - Fixed: thrown error if the `cleanParam` not string or array and if string not 107 | more than 500 characters. 108 | - Fixed: supported unicode characters in a `Allow` and a `Disallow` directives. 109 | - Fixed: thrown error if the `sitemap` option not an array or a string and not 110 | an absolute URL. 111 | 112 | ## 4.0.1 - 2016-10-27 113 | 114 | - Chore: added CI test on `node.js` version `7`. 115 | - Documentation: improve `README.md` and fix typos. 116 | 117 | ## 4.0.0 118 | 119 | - Added: `crawlDelay` to each `police` item. 120 | - Added: `cleanParam` to each `police` item (used only Yandex bot). 121 | - Chore: used `remark-preset-lint-itgalaxy` preset. 122 | - Chore: updated `devDependencies`. 123 | - Chore: updated copyright year in `LICENSE`. 124 | - Chore: improved tests. 125 | - Fixed: strict order directives for each `User-agent`. 126 | - Fixed: added newline after each `User-agent`. 127 | - Removed: `crawlDelay` from `options`. 128 | - Removed: `cleanParam` from `options`. 129 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014-present itgalaxy, inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /__tests__/__snapshots__/standalone.test.js.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`standalone config option 1`] = ` 4 | "User-agent: * 5 | Allow: / 6 | Host: some-some-domain.com 7 | " 8 | `; 9 | 10 | exports[`standalone should \`contain the \`Host\` 1`] = ` 11 | "User-agent: * 12 | Allow: / 13 | Host: domain.com 14 | " 15 | `; 16 | 17 | exports[`standalone should \`contain two \`Sitemap\` directives 1`] = ` 18 | "User-agent: * 19 | Allow: / 20 | Sitemap: http://foobar.com/sitemap.xml 21 | Sitemap: http://foobar.com/sitemap1.xml 22 | " 23 | `; 24 | 25 | exports[`standalone should \`contain two \`policy\` item with the \`Allow\` and the \`Disallow\` directives 1`] = ` 26 | "User-agent: Google 27 | Allow: / 28 | Disallow: /search-foo 29 | 30 | User-agent: Yandex 31 | Allow: / 32 | Disallow: /search-bar 33 | " 34 | `; 35 | 36 | exports[`standalone should \`contain two policy item, first have multiple \`User-agent\` option 1`] = ` 37 | "User-agent: Google 38 | User-agent: AnotherBot 39 | Allow: / 40 | Disallow: /search-foo 41 | 42 | User-agent: Yandex 43 | Allow: / 44 | Disallow: /search-bar 45 | " 46 | `; 47 | 48 | exports[`standalone should contain multiple \`User-agent\` and \`Crawl-delay\` 1`] = ` 49 | "User-agent: Google 50 | Allow: / 51 | Crawl-delay: 10 52 | 53 | User-agent: Yandex 54 | Allow: / 55 | Crawl-delay: 0.5 56 | " 57 | `; 58 | 59 | exports[`standalone should contain one \`policy\` item with the \`Allow\` directive 1`] = ` 60 | "User-agent: Google 61 | Allow: / 62 | " 63 | `; 64 | 65 | exports[`standalone should contain one \`policy\` items with the \`Allow\` directive 1`] = ` 66 | "User-agent: Google 67 | Allow: / 68 | Allow: /foobar 69 | " 70 | `; 71 | 72 | exports[`standalone should contain one \`policy\` items with the \`Disallow\` directive 1`] = ` 73 | "User-agent: Google 74 | Disallow: / 75 | Disallow: /foobar 76 | " 77 | `; 78 | 79 | exports[`standalone should contain one policy item with one \`Clean-param\` option 1`] = ` 80 | "User-agent: Yandex 81 | Allow: / 82 | Clean-param: s /forum/showthread.php 83 | " 84 | `; 85 | 86 | exports[`standalone should contain one policy item with two \`Clean-params\` options 1`] = ` 87 | "User-agent: Yandex 88 | Allow: / 89 | Clean-param: s /forum/showthread.php 90 | Clean-param: ref /forum/showthread.php 91 | " 92 | `; 93 | 94 | exports[`standalone should contain one policy item without empty \`Clean-param\` option 1`] = ` 95 | "User-agent: Yandex 96 | Allow: / 97 | " 98 | `; 99 | 100 | exports[`standalone should contain the \`Host\` if \`host\` options without protocol scheme 1`] = ` 101 | "User-agent: * 102 | Allow: / 103 | Host: www.domain.com 104 | " 105 | `; 106 | 107 | exports[`standalone should contain the \`Host\` in punycode format 1`] = ` 108 | "User-agent: * 109 | Allow: / 110 | Host: xn----8sbalhasbh9ahbi6a2ae.xn--p1ai 111 | " 112 | `; 113 | 114 | exports[`standalone should contain the \`Host\` with \`https\` scheme 1`] = ` 115 | "User-agent: * 116 | Allow: / 117 | Host: https://domain.com 118 | " 119 | `; 120 | 121 | exports[`standalone should contain the \`Host\` without \`80\` port 1`] = ` 122 | "User-agent: * 123 | Allow: / 124 | Host: domain.com 125 | " 126 | `; 127 | 128 | exports[`standalone should contain the \`Host\` without a trailing slash 1`] = ` 129 | "User-agent: * 130 | Allow: / 131 | Host: domain.com 132 | " 133 | `; 134 | 135 | exports[`standalone should contain the \`Host\` without any extra URL entire 1`] = ` 136 | "User-agent: * 137 | Allow: / 138 | Host: www.domain.com:8080 139 | " 140 | `; 141 | 142 | exports[`standalone should contain the \`Sitemap\` directive 1`] = ` 143 | "User-agent: * 144 | Allow: / 145 | Sitemap: http://foobar.com/sitemap.xml 146 | " 147 | `; 148 | 149 | exports[`standalone should contain two \`policy\` item with the \`Allow\` directive 1`] = ` 150 | "User-agent: Google 151 | Allow: / 152 | 153 | User-agent: Yandex 154 | Allow: / 155 | " 156 | `; 157 | 158 | exports[`standalone should contain two \`policy\` items with empty \`Disallow\` directive 1`] = ` 159 | "User-agent: * 160 | Disallow: 161 | 162 | User-agent: Foo 163 | Disallow: 164 | " 165 | `; 166 | 167 | exports[`standalone should generated default output without options 1`] = ` 168 | "User-agent: * 169 | Allow: / 170 | " 171 | `; 172 | 173 | exports[`standalone should load a config file 1`] = ` 174 | "User-agent: * 175 | Allow: / 176 | Host: some-some-domain.com 177 | " 178 | `; 179 | 180 | exports[`standalone should throw error if item in the \`sitemap\` option not a string or an array 1`] = `"Item in \`sitemap\` option should be a string"`; 181 | 182 | exports[`standalone should throw error if item in the \`sitemap\` option not an absolute URL 1`] = `"Item in \`sitemap\` option should be an absolute URL"`; 183 | 184 | exports[`standalone should throw error if the \`Host\` option is array 1`] = `"Options \`host\` must be only one string"`; 185 | 186 | exports[`standalone should throw error if the \`cleanParam\` option more than 500 characters 1`] = `"Option \`cleanParam\` should have no more than 500 characters"`; 187 | 188 | exports[`standalone should throw error if the \`cleanParam\` option not string or array 1`] = `"Option \`cleanParam\` should be a string or an array"`; 189 | 190 | exports[`standalone should throw error if the \`host\` option being IP address version 4 1`] = `"Options \`host\` should be not an IP address"`; 191 | 192 | exports[`standalone should throw error if the \`host\` option being IP address version 6 1`] = `"Options \`host\` should be not an IP address"`; 193 | 194 | exports[`standalone should throw error if the \`policy\` option have array the \`userAgent\` option 1`] = `"Each \`policy\` should have a single string \`userAgent\` option"`; 195 | 196 | exports[`standalone should throw error if the \`policy\` option is null 1`] = `"Options \`policy\` should be define"`; 197 | 198 | exports[`standalone should throw error if the \`policy\` option is string 1`] = `"Options \`policy\` must be array"`; 199 | 200 | exports[`standalone should throw error if the \`policy\` option not have the \`userAgent\` option 1`] = `"Each \`policy\` should have a single string \`userAgent\` option"`; 201 | 202 | exports[`standalone should throw error if the \`sitemap\` option is not absolute URL 1`] = `"Option \`sitemap\` should be an absolute URL"`; 203 | 204 | exports[`standalone should throw error if the \`sitemap\` option is not string or array 1`] = `"Option \`sitemap\` should be a string or an array"`; 205 | 206 | exports[`standalone should throw error if the item in \`cleanParam\` option more than 500 characters 1`] = `"String in \`cleanParam\` option should have no more than 500 characters"`; 207 | 208 | exports[`standalone should throw error if the item in \`cleanParam\` option not string 1`] = `"String in \`cleanParam\` option should be a string"`; 209 | 210 | exports[`standalone should throw error on invalid \`crawlDelay\` option 1`] = `"Option \`crawlDelay\` must be an integer or a float"`; 211 | 212 | exports[`standalone should throw error on invalid \`host\` option 1`] = `"Option \`host\` does not contain correct host"`; 213 | 214 | exports[`standalone should use encode url in the \`allow\` and the \`disallow\` options 1`] = ` 215 | "User-agent: Google 216 | Allow: /%D0%BA%D0%BE%D1%80%D0%B7%D0%B8%D0%BD%D0%B0 217 | Disallow: /%D0%BB%D0%B8%D1%87%D0%BD%D1%8B%D0%B9-%D0%BA%D0%B0%D0%B1%D0%B8%D0%BD%D0%B5%D1%82 218 | " 219 | `; 220 | -------------------------------------------------------------------------------- /__tests__/fixtures/robots-txt.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | host: "http://some-some-domain.com", 3 | }; 4 | -------------------------------------------------------------------------------- /__tests__/standalone.test.js: -------------------------------------------------------------------------------- 1 | import path from "path"; 2 | import generateRobotstxt from "../src/standalone.js"; 3 | 4 | const fixturesPath = path.join(__dirname, "fixtures"); 5 | 6 | describe("standalone", () => { 7 | it("should generated default output without options", async () => { 8 | await expect(generateRobotstxt()).resolves.toMatchSnapshot(); 9 | }); 10 | 11 | it("should contain one `policy` item with the `Allow` directive", async () => { 12 | await expect( 13 | generateRobotstxt({ 14 | policy: [ 15 | { 16 | allow: "/", 17 | userAgent: "Google", 18 | }, 19 | ], 20 | }) 21 | ).resolves.toMatchSnapshot(); 22 | }); 23 | 24 | it("should contain one `policy` items with the `Allow` directive", async () => { 25 | await expect( 26 | generateRobotstxt({ 27 | policy: [ 28 | { 29 | allow: ["/", "/foobar"], 30 | userAgent: "Google", 31 | }, 32 | ], 33 | }) 34 | ).resolves.toMatchSnapshot(); 35 | }); 36 | 37 | it("should contain one `policy` items with the `Disallow` directive", async () => { 38 | await expect( 39 | generateRobotstxt({ 40 | policy: [ 41 | { 42 | disallow: ["/", "/foobar"], 43 | userAgent: "Google", 44 | }, 45 | ], 46 | }) 47 | ).resolves.toMatchSnapshot(); 48 | }); 49 | 50 | it("should contain two `policy` item with the `Allow` directive", async () => { 51 | await expect( 52 | generateRobotstxt({ 53 | policy: [ 54 | { 55 | allow: "/", 56 | userAgent: "Google", 57 | }, 58 | { 59 | allow: "/", 60 | userAgent: "Yandex", 61 | }, 62 | ], 63 | }) 64 | ).resolves.toMatchSnapshot(); 65 | }); 66 | 67 | it("should `contain two `policy` item with the `Allow` and the `Disallow` directives", async () => { 68 | await expect( 69 | generateRobotstxt({ 70 | policy: [ 71 | { 72 | allow: "/", 73 | disallow: "/search-foo", 74 | userAgent: "Google", 75 | }, 76 | { 77 | allow: "/", 78 | disallow: "/search-bar", 79 | userAgent: "Yandex", 80 | }, 81 | ], 82 | }) 83 | ).resolves.toMatchSnapshot(); 84 | }); 85 | 86 | it("should `contain two policy item, first have multiple `User-agent` option", async () => { 87 | await expect( 88 | generateRobotstxt({ 89 | policy: [ 90 | { 91 | allow: "/", 92 | disallow: "/search-foo", 93 | userAgent: ["Google", "AnotherBot"], 94 | }, 95 | { 96 | allow: "/", 97 | disallow: "/search-bar", 98 | userAgent: "Yandex", 99 | }, 100 | ], 101 | }) 102 | ).resolves.toMatchSnapshot(); 103 | }); 104 | 105 | it("should use encode url in the `allow` and the `disallow` options", async () => { 106 | await expect( 107 | generateRobotstxt({ 108 | policy: [ 109 | { 110 | allow: "/корзина", 111 | disallow: "/личный-кабинет", 112 | userAgent: "Google", 113 | }, 114 | ], 115 | }) 116 | ).resolves.toMatchSnapshot(); 117 | }); 118 | 119 | it("should throw error if the `policy` option is string", async () => { 120 | await expect( 121 | generateRobotstxt({ 122 | policy: "string", 123 | }) 124 | ).rejects.toThrowErrorMatchingSnapshot(); 125 | }); 126 | 127 | it("should throw error if the `policy` option is null", async () => { 128 | await expect( 129 | generateRobotstxt({ 130 | policy: null, 131 | }) 132 | ).rejects.toThrowErrorMatchingSnapshot(); 133 | }); 134 | 135 | it("should throw error if the `policy` option not have the `userAgent` option", async () => { 136 | await expect( 137 | generateRobotstxt({ 138 | policy: [{}], 139 | }) 140 | ).rejects.toThrowErrorMatchingSnapshot(); 141 | }); 142 | 143 | it("should throw error if the `policy` option have array the `userAgent` option", async () => { 144 | await expect( 145 | generateRobotstxt({ 146 | policy: [ 147 | { 148 | userAgent: [], 149 | }, 150 | ], 151 | }) 152 | ).rejects.toThrowErrorMatchingSnapshot(); 153 | }); 154 | 155 | it("should contain the `Sitemap` directive", async () => { 156 | await expect( 157 | generateRobotstxt({ 158 | sitemap: "http://foobar.com/sitemap.xml", 159 | }) 160 | ).resolves.toMatchSnapshot(); 161 | }); 162 | 163 | it("should throw error if the `sitemap` option is not string or array", async () => { 164 | await expect( 165 | generateRobotstxt({ 166 | sitemap: {}, 167 | }) 168 | ).rejects.toThrowErrorMatchingSnapshot(); 169 | }); 170 | 171 | it("should throw error if the `sitemap` option is not absolute URL", async () => { 172 | await expect( 173 | generateRobotstxt({ 174 | sitemap: "sitemap.xml", 175 | }) 176 | ).rejects.toThrowErrorMatchingSnapshot(); 177 | }); 178 | 179 | it("should throw error if item in the `sitemap` option not an absolute URL", async () => { 180 | await expect( 181 | generateRobotstxt({ 182 | sitemap: ["sitemap.xml"], 183 | }) 184 | ).rejects.toThrowErrorMatchingSnapshot(); 185 | }); 186 | 187 | it("should throw error if item in the `sitemap` option not a string or an array", async () => { 188 | await expect( 189 | generateRobotstxt({ 190 | sitemap: [{}], 191 | }) 192 | ).rejects.toThrowErrorMatchingSnapshot(); 193 | }); 194 | 195 | it("should `contain two `Sitemap` directives", async () => { 196 | await expect( 197 | generateRobotstxt({ 198 | sitemap: [ 199 | "http://foobar.com/sitemap.xml", 200 | "http://foobar.com/sitemap1.xml", 201 | ], 202 | }) 203 | ).resolves.toMatchSnapshot(); 204 | }); 205 | 206 | it("should `contain the `Host`", async () => { 207 | await expect( 208 | generateRobotstxt({ 209 | host: "http://domain.com", 210 | }) 211 | ).resolves.toMatchSnapshot(); 212 | }); 213 | 214 | it("should contain the `Host` without a trailing slash", async () => { 215 | await expect( 216 | generateRobotstxt({ 217 | host: "http://domain.com/", 218 | }) 219 | ).resolves.toMatchSnapshot(); 220 | }); 221 | 222 | it("should contain the `Host` in punycode format", async () => { 223 | await expect( 224 | generateRobotstxt({ 225 | host: "интернет-магазин.рф", 226 | }) 227 | ).resolves.toMatchSnapshot(); 228 | }); 229 | 230 | it("should contain the `Host` without `80` port", async () => { 231 | await expect( 232 | generateRobotstxt({ 233 | host: "domain.com:80", 234 | }) 235 | ).resolves.toMatchSnapshot(); 236 | }); 237 | 238 | it("should contain the `Host` if `host` options without protocol scheme", async () => { 239 | await expect( 240 | generateRobotstxt({ 241 | host: "www.domain.com", 242 | }) 243 | ).resolves.toMatchSnapshot(); 244 | }); 245 | 246 | it("should throw error on invalid `host` option", async () => { 247 | await expect( 248 | generateRobotstxt({ 249 | host: "?:foobar", 250 | }) 251 | ).rejects.toThrowErrorMatchingSnapshot(); 252 | }); 253 | 254 | it("should throw error if the `host` option being IP address version 4", async () => { 255 | await expect( 256 | generateRobotstxt({ 257 | host: "127.0.0.1", 258 | }) 259 | ).rejects.toThrowErrorMatchingSnapshot(); 260 | }); 261 | 262 | it("should throw error if the `host` option being IP address version 6", async () => { 263 | await expect( 264 | generateRobotstxt({ 265 | host: "0:0:0:0:0:0:7f00:1", 266 | }) 267 | ).rejects.toThrowErrorMatchingSnapshot(); 268 | }); 269 | 270 | it("should contain the `Host` with `https` scheme", async () => { 271 | await expect( 272 | generateRobotstxt({ 273 | host: "https://domain.com", 274 | }) 275 | ).resolves.toMatchSnapshot(); 276 | }); 277 | 278 | it("should contain the `Host` without any extra URL entire", async () => { 279 | await expect( 280 | generateRobotstxt({ 281 | host: "http://www.domain.com:8080/foo/bar/foobar.php?foo=bar#foobar", 282 | }) 283 | ).resolves.toMatchSnapshot(); 284 | }); 285 | 286 | it("should throw error if the `Host` option is array", async () => { 287 | await expect( 288 | generateRobotstxt({ 289 | host: ["http://domain.com", "http://domain1.com"], 290 | }) 291 | ).rejects.toThrowErrorMatchingSnapshot(); 292 | }); 293 | 294 | it("should contain multiple `User-agent` and `Crawl-delay`", async () => { 295 | await expect( 296 | generateRobotstxt({ 297 | policy: [ 298 | { 299 | allow: "/", 300 | crawlDelay: 10, 301 | userAgent: "Google", 302 | }, 303 | { 304 | allow: "/", 305 | crawlDelay: 0.5, 306 | userAgent: "Yandex", 307 | }, 308 | ], 309 | }) 310 | ).resolves.toMatchSnapshot(); 311 | }); 312 | 313 | it("should throw error on invalid `crawlDelay` option", async () => { 314 | await expect( 315 | generateRobotstxt({ 316 | policy: [ 317 | { 318 | allow: "/", 319 | crawlDelay: "foo", 320 | userAgent: "Google", 321 | }, 322 | ], 323 | }) 324 | ).rejects.toThrowErrorMatchingSnapshot(); 325 | }); 326 | 327 | it("should contain one policy item with one `Clean-param` option", async () => { 328 | await expect( 329 | generateRobotstxt({ 330 | policy: [ 331 | { 332 | allow: "/", 333 | cleanParam: "s /forum/showthread.php", 334 | userAgent: "Yandex", 335 | }, 336 | ], 337 | }) 338 | ).resolves.toMatchSnapshot(); 339 | }); 340 | 341 | it("should contain one policy item with two `Clean-params` options", async () => { 342 | await expect( 343 | generateRobotstxt({ 344 | policy: [ 345 | { 346 | allow: "/", 347 | cleanParam: [ 348 | "s /forum/showthread.php", 349 | "ref /forum/showthread.php", 350 | ], 351 | userAgent: "Yandex", 352 | }, 353 | ], 354 | }) 355 | ).resolves.toMatchSnapshot(); 356 | }); 357 | 358 | it("should throw error if the `cleanParam` option more than 500 characters", async () => { 359 | await expect( 360 | generateRobotstxt({ 361 | policy: [ 362 | { 363 | allow: "/", 364 | cleanParam: new Array(502).join("a"), 365 | userAgent: "Yandex", 366 | }, 367 | ], 368 | }) 369 | ).rejects.toThrowErrorMatchingSnapshot(); 370 | }); 371 | 372 | it("should throw error if the item in `cleanParam` option more than 500 characters", async () => { 373 | await expect( 374 | generateRobotstxt({ 375 | policy: [ 376 | { 377 | allow: "/", 378 | cleanParam: [new Array(502).join("a")], 379 | userAgent: "Yandex", 380 | }, 381 | ], 382 | }) 383 | ).rejects.toThrowErrorMatchingSnapshot(); 384 | }); 385 | 386 | it("should throw error if the `cleanParam` option not string or array", async () => { 387 | await expect( 388 | generateRobotstxt({ 389 | policy: [ 390 | { 391 | allow: "/", 392 | cleanParam: {}, 393 | userAgent: "Yandex", 394 | }, 395 | ], 396 | }) 397 | ).rejects.toThrowErrorMatchingSnapshot(); 398 | }); 399 | 400 | it("should throw error if the item in `cleanParam` option not string", async () => { 401 | await expect( 402 | generateRobotstxt({ 403 | policy: [ 404 | { 405 | allow: "/", 406 | cleanParam: [{}], 407 | userAgent: "Yandex", 408 | }, 409 | ], 410 | }) 411 | ).rejects.toThrowErrorMatchingSnapshot(); 412 | }); 413 | 414 | it("config option", async () => { 415 | await expect( 416 | generateRobotstxt({ 417 | configFile: path.join(fixturesPath, "robots-txt.config.js"), 418 | }) 419 | ).resolves.toMatchSnapshot(); 420 | }); 421 | 422 | it("should throw error if config don't found", async () => { 423 | await expect( 424 | generateRobotstxt({ 425 | configFile: path.join(fixturesPath, "not-found.config.js"), 426 | }) 427 | ).rejects.toThrow(/no such file or directory/); 428 | }); 429 | 430 | it("should load a config file", async () => { 431 | const oldProcessCwd = process.cwd(); 432 | 433 | process.chdir(fixturesPath); 434 | 435 | const context = await generateRobotstxt(); 436 | 437 | expect(context).toMatchSnapshot(); 438 | 439 | process.chdir(oldProcessCwd); 440 | }); 441 | 442 | it("should contain two `policy` items with empty `Disallow` directive", async () => { 443 | await expect( 444 | generateRobotstxt({ 445 | policy: [ 446 | { 447 | allow: "", 448 | disallow: "", 449 | userAgent: "*", 450 | }, 451 | { 452 | allow: "", 453 | disallow: [""], 454 | userAgent: "Foo", 455 | }, 456 | ], 457 | }) 458 | ).resolves.toMatchSnapshot(); 459 | }); 460 | 461 | it("should contain one policy item without empty `Clean-param` option", async () => { 462 | await expect( 463 | generateRobotstxt({ 464 | policy: [ 465 | { 466 | allow: "/", 467 | cleanParam: [], 468 | userAgent: "Yandex", 469 | }, 470 | ], 471 | }) 472 | ).resolves.toMatchSnapshot(); 473 | }); 474 | }); 475 | -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const MIN_BABEL_VERSION = 7; 4 | 5 | module.exports = (api) => { 6 | api.assertVersion(MIN_BABEL_VERSION); 7 | api.cache(true); 8 | 9 | return { 10 | presets: [ 11 | [ 12 | "@babel/preset-env", 13 | { 14 | targets: { 15 | node: "10.13.0", 16 | }, 17 | }, 18 | ], 19 | ], 20 | plugins: [ 21 | [ 22 | "add-module-exports", 23 | { 24 | addDefaultProperty: true, 25 | }, 26 | ], 27 | ], 28 | }; 29 | }; 30 | -------------------------------------------------------------------------------- /husky.config.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | module.exports = { 4 | hooks: { 5 | "pre-commit": "lint-staged", 6 | }, 7 | }; 8 | -------------------------------------------------------------------------------- /lint-staged.config.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | module.exports = { 4 | "*.{js,cjs,mjs,jsx,ts,tsx}": [ 5 | "prettier --list-different", 6 | "eslint --report-unused-disable-directives", 7 | "git add", 8 | ], 9 | "!(CHANGELOG).{md,markdown,mdown,mkdn,mkd,mdwn,mkdown,ron}": [ 10 | "prettier --list-different", 11 | "remark -f -q", 12 | "git add", 13 | ], 14 | "*.{yml,yaml}": ["prettier --list-different", "git add"], 15 | }; 16 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "generate-robotstxt", 3 | "version": "8.0.3", 4 | "description": "Awesome generator robots.txt", 5 | "author": "itgalaxy ", 6 | "contributors": [ 7 | { 8 | "name": "Alexander Krasnoyarov", 9 | "email": "alexander.krasnoyarov@itgalaxy.company", 10 | "url": "https://vk.com/sterling_archer" 11 | } 12 | ], 13 | "repository": { 14 | "type": "https", 15 | "url": "https://github.com/itgalaxy/generate-robotstxt" 16 | }, 17 | "keywords": [ 18 | "robotstxt", 19 | "robots.txt", 20 | "generate", 21 | "robots-txt", 22 | "user-agent", 23 | "police", 24 | "allow", 25 | "disallow", 26 | "crawl-delay", 27 | "robot", 28 | "robots" 29 | ], 30 | "bugs": { 31 | "url": "https://github.com/itgalaxy/generate-robotstxt/issues" 32 | }, 33 | "main": "dist/index.js", 34 | "bin": "dist/cli.js", 35 | "files": [ 36 | "dist" 37 | ], 38 | "license": "MIT", 39 | "engines": { 40 | "node": ">= 10.13.0" 41 | }, 42 | "dependencies": { 43 | "cosmiconfig": "^6.0.0", 44 | "fs-extra": "^9.0.0", 45 | "ip-regex": "^4.1.0", 46 | "is-absolute-url": "^3.0.3", 47 | "meow": "^7.0.1", 48 | "resolve-from": "^5.0.0" 49 | }, 50 | "devDependencies": { 51 | "@babel/cli": "^7.8.4", 52 | "@babel/core": "^7.9.6", 53 | "@babel/preset-env": "^7.9.6", 54 | "babel-eslint": "^10.1.0", 55 | "babel-jest": "^26.0.1", 56 | "babel-plugin-add-module-exports": "^1.0.2", 57 | "eslint": "^7.0.0", 58 | "eslint-plugin-ava": "^10.3.0", 59 | "eslint-plugin-html": "^6.0.2", 60 | "eslint-plugin-import": "^2.20.2", 61 | "eslint-plugin-itgalaxy": "^126.0.0", 62 | "eslint-plugin-jest": "^23.10.0", 63 | "eslint-plugin-jsx-a11y": "^6.2.3", 64 | "eslint-plugin-lodash": "^7.1.0", 65 | "eslint-plugin-markdown": "^1.0.2", 66 | "eslint-plugin-node": "^11.1.0", 67 | "eslint-plugin-promise": "^4.2.1", 68 | "eslint-plugin-react": "^7.19.0", 69 | "eslint-plugin-unicorn": "^19.0.1", 70 | "execa": "^4.0.1", 71 | "husky": "^4.2.5", 72 | "jest": "^26.0.1", 73 | "lint-staged": "^10.2.2", 74 | "npm-run-all": "^4.1.5", 75 | "prettier": "^2.0.5", 76 | "remark-cli": "^8.0.0", 77 | "remark-preset-lint-itgalaxy": "^15.0.0", 78 | "rimraf": "^3.0.2", 79 | "standard-version": "^8.0.0", 80 | "tempfile": "^3.0.0" 81 | }, 82 | "scripts": { 83 | "prebuild": "rimraf dist", 84 | "build": "babel src --out-dir dist --ignore /__tests__/", 85 | "lint:prettier": "prettier --list-different '{src,__tests__}/**/*.{js,mjs,jsx,md,yml,yaml}' '*.{js,mjs,md,yml,yaml}' '!CHANGELOG.md'", 86 | "lint:js": "eslint . --cache --report-unused-disable-directives --ignore-path .gitignore --ext '.js,.mjs,.jsx,md'", 87 | "lint:md": "remark . -i .gitignore -f -q", 88 | "lint": "npm-run-all -l -p 'lint:**'", 89 | "prettier": "npm run lint:prettier -- --write", 90 | "fix:js": "npm run lint:js -- --fix", 91 | "fix": "npm-run-all -l prettier -p \"fix:**\"", 92 | "pretest": "npm run lint", 93 | "test:only": "jest", 94 | "test:coverage": "jest --coverage", 95 | "test": "npm run test:only", 96 | "prepare": "npm run build", 97 | "release": "standard-version" 98 | }, 99 | "jest": { 100 | "testPathIgnorePatterns": [ 101 | "/node_modules/", 102 | "/fixtures/", 103 | "helpers.js" 104 | ] 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # generate-robotstxt 2 | 3 | [![NPM version](https://img.shields.io/npm/v/generate-robotstxt.svg)](https://www.npmjs.org/package/generate-robotstxt) 4 | [![Travis Build Status](https://img.shields.io/travis/itgalaxy/generate-robotstxt/master.svg?label=build)](https://travis-ci.org/itgalaxy/generate-robotstxt) 5 | [![dependencies Status](https://david-dm.org/itgalaxy/generate-robotstxt/status.svg)](https://david-dm.org/itgalaxy/generate-robotstxt) 6 | [![devDependencies Status](https://david-dm.org/itgalaxy/generate-robotstxt/dev-status.svg)](https://david-dm.org/itgalaxy/generate-robotstxt?type=dev) 7 | 8 | Awesome generator robots.txt. 9 | 10 | ## Installation 11 | 12 | ```shell 13 | npm install --save-dev generate-robotstxt 14 | ``` 15 | 16 | ## Usage 17 | 18 | ```js 19 | import robotstxt from "generate-robotstxt"; 20 | 21 | robotstxt({ 22 | policy: [ 23 | { 24 | userAgent: "Googlebot", 25 | allow: "/", 26 | disallow: "/search", 27 | crawlDelay: 2, 28 | }, 29 | { 30 | userAgent: "OtherBot", 31 | allow: ["/allow-for-all-bots", "/allow-only-for-other-bot"], 32 | disallow: ["/admin", "/login"], 33 | crawlDelay: 2, 34 | }, 35 | { 36 | userAgent: "*", 37 | allow: "/", 38 | disallow: "/search", 39 | crawlDelay: 10, 40 | cleanParam: "ref /articles/", 41 | }, 42 | ], 43 | sitemap: "http://example.com/sitemap.xml", 44 | host: "http://example.com", 45 | }) 46 | .then((content) => { 47 | console.log(content); 48 | 49 | return content; 50 | }) 51 | .catch((error) => { 52 | throw error; 53 | }); 54 | ``` 55 | 56 | ## File based configuration 57 | 58 | **robots-txt.config.js** 59 | 60 | ```js 61 | module.exports = { 62 | policy: [ 63 | { 64 | userAgent: "Googlebot", 65 | allow: "/", 66 | disallow: ["/search"], 67 | crawlDelay: 2, 68 | }, 69 | { 70 | userAgent: "OtherBot", 71 | allow: ["/allow-for-all-bots", "/allow-only-for-other-bot"], 72 | disallow: ["/admin", "/login"], 73 | crawlDelay: 2, 74 | }, 75 | { 76 | userAgent: "*", 77 | allow: "/", 78 | disallow: "/search", 79 | crawlDelay: 10, 80 | cleanParam: "ref /articles/", 81 | }, 82 | ], 83 | sitemap: "http://example.com/sitemap.xml", 84 | host: "http://example.com", 85 | }; 86 | ``` 87 | 88 | ## CLI 89 | 90 | ```shell 91 | Awesome generator robots.txt 92 | 93 | Usage generate-robotstxt [options] 94 | 95 | Options: 96 | --config Path to a specific configuration file. 97 | ``` 98 | 99 | ## Contribution 100 | 101 | Feel free to push your code if you agree with publishing under the MIT license. 102 | 103 | ## [Changelog](CHANGELOG.md) 104 | 105 | ## [License](LICENSE) 106 | -------------------------------------------------------------------------------- /src/cli.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import path from "path"; 4 | import fs from "fs-extra"; 5 | import meow from "meow"; 6 | import resolveFrom from "resolve-from"; 7 | import standalone from "./standalone.js"; 8 | 9 | const cli = meow( 10 | ` 11 | Usage generate-robotstxt [options] 12 | 13 | Options: 14 | --config Path to a specific configuration file. 15 | `, 16 | { 17 | flags: { 18 | config: { 19 | type: "string", 20 | }, 21 | }, 22 | } 23 | ); 24 | 25 | const optionsBase = {}; 26 | 27 | if (cli.flags.config) { 28 | // Should check these possibilities: 29 | // a. name of a node_module 30 | // b. absolute path 31 | // c. relative path relative to `process.cwd()`. 32 | // If none of the above work, we'll try a relative path starting 33 | // in `process.cwd()`. 34 | optionsBase.configFile = 35 | resolveFrom(process.cwd(), cli.flags.config) || 36 | path.join(process.cwd(), cli.flags.config); 37 | } 38 | 39 | Promise.resolve() 40 | .then(() => Object.assign({}, optionsBase)) 41 | .then((options) => standalone(options)) 42 | .then((output) => { 43 | if (cli.input.length === 0) { 44 | throw new Error("Require `dest` argument"); 45 | } 46 | 47 | const dest = path.resolve(cli.input.pop()); 48 | 49 | return Promise.resolve().then(() => fs.outputFile(dest, output)); 50 | }) 51 | .catch((error) => { 52 | console.log(error); // eslint-disable-line no-console 53 | process.exit(error.code || 1); 54 | }); 55 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import standalone from "./standalone.js"; 2 | 3 | export default standalone; 4 | -------------------------------------------------------------------------------- /src/standalone.js: -------------------------------------------------------------------------------- 1 | import path from "path"; 2 | import url from "url"; 3 | import ipRegex from "ip-regex"; 4 | import { cosmiconfig } from "cosmiconfig"; 5 | import isAbsoluteUrl from "is-absolute-url"; 6 | 7 | function capitaliseFirstLetter(string) { 8 | return string.charAt(0).toUpperCase() + string.slice(1); 9 | } 10 | 11 | function addLine(name, rule) { 12 | let contents = ""; 13 | 14 | if (rule && Array.isArray(rule) && rule.length > 0) { 15 | rule.forEach((item) => { 16 | contents += addLine(name, item); 17 | }); 18 | } else { 19 | const ruleContent = (name === "Allow" || name === "Disallow" 20 | ? encodeURI(rule) 21 | : rule 22 | ).toString(); 23 | 24 | contents += `${capitaliseFirstLetter( 25 | name.replace(/([a-z])([A-Z])/g, "$1-$2").toLowerCase() 26 | )}:${ruleContent.length > 0 ? ` ${ruleContent}` : ""}\n`; 27 | } 28 | 29 | return contents; 30 | } 31 | 32 | function generatePoliceItem(item, index) { 33 | let contents = ""; 34 | 35 | if (index !== 0) { 36 | contents += "\n"; 37 | } 38 | 39 | contents += addLine("User-agent", item.userAgent); 40 | 41 | if (item.allow) { 42 | contents += addLine("Allow", item.allow); 43 | } 44 | 45 | if (typeof item.disallow === "string" || Array.isArray(item.disallow)) { 46 | contents += addLine("Disallow", item.disallow); 47 | } 48 | 49 | if (item.crawlDelay) { 50 | contents += addLine("Crawl-delay", item.crawlDelay); 51 | } 52 | 53 | // Move from policy for next master version 54 | // https://yandex.ru/support/webmaster/controlling-robot/robots-txt.html 55 | if (item.cleanParam && item.cleanParam.length > 0) { 56 | contents += addLine("Clean-param", item.cleanParam); 57 | } 58 | 59 | return contents; 60 | } 61 | 62 | function buildConfig(configFile = null) { 63 | let searchPath = process.cwd(); 64 | let configPath = null; 65 | 66 | if (configFile) { 67 | searchPath = null; 68 | configPath = path.resolve(process.cwd(), configFile); 69 | } 70 | 71 | const configExplorer = cosmiconfig("robots-txt"); 72 | const searchForConfig = configPath 73 | ? configExplorer.load(configPath) 74 | : configExplorer.search(searchPath); 75 | 76 | return searchForConfig.then((result) => { 77 | if (!result) { 78 | return {}; 79 | } 80 | 81 | return result; 82 | }); 83 | } 84 | 85 | export default function ({ 86 | configFile = null, 87 | policy = [ 88 | { 89 | allow: "/", 90 | cleanParam: null, 91 | crawlDelay: null, 92 | userAgent: "*", 93 | }, 94 | ], 95 | sitemap = null, 96 | host = null, 97 | } = {}) { 98 | let options = { 99 | host, 100 | policy, 101 | sitemap, 102 | }; 103 | 104 | return Promise.resolve() 105 | .then(() => 106 | buildConfig(configFile).then((result) => { 107 | // Need avoid this behaviour in next major release 108 | // Load config file when it is passed or options were set 109 | options = Object.assign({}, options, result.config); 110 | 111 | return options; 112 | }) 113 | ) 114 | .then( 115 | () => 116 | new Promise((resolve) => { 117 | if (options.policy) { 118 | if (!Array.isArray(options.policy)) { 119 | throw new Error("Options `policy` must be array"); 120 | } 121 | 122 | options.policy.forEach((item) => { 123 | if (!item.userAgent || item.userAgent.length === 0) { 124 | throw new Error( 125 | "Each `policy` should have a single string `userAgent` option" 126 | ); 127 | } 128 | 129 | if ( 130 | item.crawlDelay && 131 | typeof item.crawlDelay !== "number" && 132 | !Number.isFinite(item.crawlDelay) 133 | ) { 134 | throw new Error( 135 | "Option `crawlDelay` must be an integer or a float" 136 | ); 137 | } 138 | 139 | if (item.cleanParam) { 140 | if ( 141 | typeof item.cleanParam === "string" && 142 | item.cleanParam.length > 500 143 | ) { 144 | throw new Error( 145 | "Option `cleanParam` should have no more than 500 characters" 146 | ); 147 | } else if (Array.isArray(item.cleanParam)) { 148 | item.cleanParam.forEach((subItem) => { 149 | if (typeof subItem === "string" && subItem.length > 500) { 150 | throw new Error( 151 | "String in `cleanParam` option should have no more than 500 characters" 152 | ); 153 | } else if (typeof subItem !== "string") { 154 | throw new Error( 155 | "String in `cleanParam` option should be a string" 156 | ); 157 | } 158 | }); 159 | } else if ( 160 | typeof item.cleanParam !== "string" && 161 | !Array.isArray(item.cleanParam) 162 | ) { 163 | throw new Error( 164 | "Option `cleanParam` should be a string or an array" 165 | ); 166 | } 167 | } 168 | }); 169 | } else { 170 | throw new Error("Options `policy` should be define"); 171 | } 172 | 173 | if (options.sitemap) { 174 | if ( 175 | typeof options.sitemap === "string" && 176 | !isAbsoluteUrl(options.sitemap) 177 | ) { 178 | throw new Error("Option `sitemap` should be an absolute URL"); 179 | } else if (Array.isArray(options.sitemap)) { 180 | options.sitemap.forEach((item) => { 181 | if (typeof item === "string" && !isAbsoluteUrl(item)) { 182 | throw new Error( 183 | "Item in `sitemap` option should be an absolute URL" 184 | ); 185 | } else if (typeof item !== "string") { 186 | throw new Error( 187 | "Item in `sitemap` option should be a string" 188 | ); 189 | } 190 | }); 191 | } else if ( 192 | typeof options.sitemap !== "string" && 193 | !Array.isArray(options.sitemap) 194 | ) { 195 | throw new Error( 196 | "Option `sitemap` should be a string or an array" 197 | ); 198 | } 199 | } 200 | 201 | if (options.host) { 202 | if (typeof options.host !== "string") { 203 | throw new Error("Options `host` must be only one string"); 204 | } 205 | 206 | if (ipRegex({ exact: true }).test(options.host)) { 207 | throw new Error("Options `host` should be not an IP address"); 208 | } 209 | } 210 | 211 | let contents = ""; 212 | 213 | options.policy.forEach((item, index) => { 214 | contents += generatePoliceItem(item, index); 215 | }); 216 | 217 | if (options.sitemap) { 218 | contents += addLine("Sitemap", options.sitemap); 219 | } 220 | 221 | if (options.host) { 222 | let normalizeHost = options.host; 223 | 224 | if (normalizeHost.search(/^https?:\/\//) === -1) { 225 | normalizeHost = `http://${host}`; 226 | } 227 | 228 | // eslint-disable-next-line node/no-deprecated-api 229 | const parsedURL = url.parse(normalizeHost, false, true); 230 | 231 | if (!parsedURL.host) { 232 | throw new Error("Option `host` does not contain correct host"); 233 | } 234 | 235 | let formattedHost = url.format({ 236 | host: 237 | parsedURL.port && parsedURL.port === "80" 238 | ? parsedURL.hostname 239 | : parsedURL.host, 240 | port: 241 | parsedURL.port && parsedURL.port === "80" ? "" : parsedURL.port, 242 | protocol: parsedURL.protocol, 243 | }); 244 | 245 | formattedHost = formattedHost.replace(/^http:\/\//, ""); 246 | 247 | contents += addLine("Host", formattedHost); 248 | } 249 | 250 | return resolve(contents); 251 | }) 252 | ); 253 | } 254 | --------------------------------------------------------------------------------