├── _config.php ├── .gitattributes ├── _config └── routes.yml ├── CODE_OF_CONDUCT.md ├── .editorconfig ├── composer.json ├── .scrutinizer.yml ├── README.md └── src └── Robots.php /_config.php: -------------------------------------------------------------------------------- 1 | 16 | 17 | ## Requirements 18 | 19 | * Silverstripe 6 20 | 21 | ## Installation Instructions 22 | 23 | * Extract all files into the 'robots' folder under your Silverstripe root, or install using composer 24 | 25 | ```bash 26 | composer require "tractorcow/silverstripe-robots": "^5" 27 | ``` 28 | 29 | * Make sure you are correctly setting your environment for it to work properly 30 | * It's also advisable to either install the googlesitemaps module, or to create a physical `sitemap.xml` in your site root. 31 | 32 | ## Configuration 33 | 34 | You can add a page or pattern to be blocked by adding it to the disallowedUrls configuration 35 | 36 | ```yaml 37 | --- 38 | Name: myrobotsconfiguration 39 | --- 40 | TractorCow\Robots\Robots: 41 | disallowed_urls: 42 | - 'mysecretpage.html' 43 | - '_private' 44 | - 'Documents-and-Settings/Ricky/My-Documents/faxes/sent-faxes' 45 | ``` 46 | 47 | Also by default, any page with 'ShowInSearch' set to false will also be excluded. This 48 | can be useful for hiding auxilary pages like "thanks for signing up", or error pages. 49 | 50 | You can turn this off (if you really absolutely think you need to) using the below 51 | 52 | ```yaml 53 | --- 54 | Name: myrobotsconfiguration 55 | --- 56 | TractorCow\Robots\Robots: 57 | disallow_unsearchable: false 58 | ``` 59 | 60 | By default the module will check for a sitemap file in `/sitemap.xml`, or will assume 61 | one is there if the googlesitemap module is installed. You can set a custom file location 62 | using the below configuration. 63 | 64 | ```yaml 65 | --- 66 | Name: myrobotsconfiguration 67 | --- 68 | TractorCow\Robots\Robots: 69 | sitemap: '/sitemap_index.xml' 70 | ``` 71 | 72 | ## Need more help? 73 | 74 | Message or email me at damian.mooyman@gmail.com or, well, read the code! 75 | 76 | ## License 77 | 78 | Copyright (c) 2013, Damian Mooyman 79 | All rights reserved. 80 | 81 | All rights reserved. 82 | 83 | Redistribution and use in source and binary forms, with or without 84 | modification, are permitted provided that the following conditions are met: 85 | 86 | * Redistributions of source code must retain the above copyright 87 | notice, this list of conditions and the following disclaimer. 88 | * Redistributions in binary form must reproduce the above copyright 89 | notice, this list of conditions and the following disclaimer in the 90 | documentation and/or other materials provided with the distribution. 91 | * The name of Damian Mooyman may not be used to endorse or promote products 92 | derived from this software without specific prior written permission. 93 | 94 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 95 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 96 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 97 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 98 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 99 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 100 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 101 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 102 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 103 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 104 | -------------------------------------------------------------------------------- /src/Robots.php: -------------------------------------------------------------------------------- 1 | renderSitemap(); 69 | $text .= "User-agent: *\n"; 70 | $text .= $this->renderDisallow(); 71 | $text .= $this->renderAllow(); 72 | 73 | $response = new HTTPResponse($text, 200); 74 | $response->addHeader("Content-Type", "text/plain; charset=\"utf-8\""); 75 | return $response; 76 | } 77 | 78 | /** 79 | * Renders the sitemap link reference 80 | * 81 | * @return string 82 | */ 83 | protected function renderSitemap() 84 | { 85 | // No sitemap if not public 86 | if (!$this->isPublic()) { 87 | return ''; 88 | } 89 | 90 | // Check if sitemap is configured 91 | $sitemap = static::config()->get('sitemap'); 92 | if (empty($sitemap)) { 93 | return ''; 94 | } 95 | 96 | // Skip sitemap if not available 97 | if (!class_exists(GoogleSitemap::class) && !Director::fileExists($sitemap)) { 98 | return ''; 99 | } 100 | 101 | // Report the sitemap location 102 | return sprintf("Sitemap: %s\n", Director::absoluteURL($sitemap)); 103 | } 104 | 105 | /** 106 | * Renders the list of disallowed pages 107 | * 108 | * @return string 109 | */ 110 | protected function renderDisallow() 111 | { 112 | // List only disallowed urls 113 | $text = ''; 114 | foreach ($this->disallowedUrls() as $url) { 115 | $text .= sprintf("Disallow: %s\n", $url); 116 | } 117 | return $text; 118 | } 119 | 120 | /** 121 | * Renders the list of allowed pages, if any 122 | * 123 | * @return string 124 | */ 125 | protected function renderAllow() 126 | { 127 | $text = ''; 128 | foreach ($this->allowedUrls() as $url) { 129 | $text .= sprintf("Allow: %s\n", $url); 130 | } 131 | return $text; 132 | } 133 | 134 | /** 135 | * Returns an array of disallowed URLs 136 | * 137 | * @return array 138 | */ 139 | protected function disallowedUrls() 140 | { 141 | // If not public, disallow all 142 | if (!$this->isPublic()) { 143 | return ["/"]; 144 | } 145 | 146 | // Get configured disallowed urls 147 | $urls = (array)static::config()->get('disallowed_urls'); 148 | 149 | // Add all pages where ShowInSearch is false 150 | if (static::config()->get('disallow_unsearchable')) { 151 | /** @var SiteTree[] $unsearchablePages */ 152 | $unsearchablePages = SiteTree::get()->filter(['ShowInSearch' => false]); 153 | 154 | if (class_exists('SilverStripe\CMS\Model\RedirectorPage')) { 155 | $unsearchablePages = $unsearchablePages->exclude('ClassName', 'SilverStripe\CMS\Model\RedirectorPage'); 156 | } 157 | 158 | foreach ($unsearchablePages as $page) { 159 | $link = $page->Link(); 160 | 161 | // Don't disallow home page 162 | if ($link !== '/') { 163 | $urls[] = $link; 164 | } 165 | } 166 | } 167 | $this->extend('updateDisallowedUrls', $urls); 168 | return array_unique($urls); 169 | } 170 | 171 | /** 172 | * Returns an array of allowed URLs 173 | * 174 | * @return array 175 | */ 176 | protected function allowedUrls() 177 | { 178 | return (array)static::config()->get('allowed_urls'); 179 | } 180 | } 181 | --------------------------------------------------------------------------------