├── .gitignore ├── LICENSE ├── README.md ├── bombs ├── 10G.gzip ├── 1G.gzip ├── 1T.gzip └── README.md ├── nobots.go └── nobots_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | Mac OS X 2 | .DS_Store 3 | 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.dll 7 | *.so 8 | *.dylib 9 | 10 | # Test binary, build with `go test -c` 11 | *.test 12 | 13 | # Output of the go coverage tool, specifically when used with LiteIDE 14 | *.out 15 | 16 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 17 | .glide/ 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jaume Martin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NoBots 2 | Caddy Server plugin to protect your website against web crawlers and bots. 3 | 4 | ## Usage 5 | The directive for the Caddyfile is really simple. First, you have to place the bomb path next to the `nobots` keyword, for example `bomb.gz` in the example below. 6 | 7 | Then you can specify user agent either as strings or regular expresions. When using regular expresions you must add the `regexp` keyword in front of the regex. 8 | 9 | Caddyfile example: 10 | 11 | ``` 12 | nobots "bomb.gz" { 13 | "Googlebot/2.1 (+http://www.googlebot.com/bot.html)" 14 | "DuckDuckBot" 15 | regexp "^[Bb]ot" 16 | regexp "bingbot" 17 | } 18 | ``` 19 | 20 | There is another keyword that is useful in case you want to allow crawlers and bots navigate through specific parts of your website. The keyword is `public` and its values are regular expresions, so you can use it as following: 21 | 22 | ``` 23 | nobots "bomb.gz" { 24 | "Googlebot/2.1 (+http://www.googlebot.com/bot.html)" 25 | public "^/public" 26 | public "^/[a-z]{,5}/public" 27 | } 28 | ``` 29 | 30 | The above example will send the bot to all URIs except those that match with `/public` and `[a-z]{,5}/public`. 31 | 32 | NOTE: By default all URIs. 33 | 34 | 35 | ## How to create a bomb 36 | The bomb is not provided within the plugin so you have to create one. In Linux it is really easy, you can use the following commands. 37 | 38 | ``` 39 | dd if=/dev/zero bs=1M count=1024 | gzip > 1G.gzip 40 | dd if=/dev/zero bs=1M count=10240 | gzip > 10G.gzip 41 | dd if=/dev/zero bs=1M count=1048576 | gzip > 1T.gzip 42 | ``` 43 | 44 | To optimize the final bomb you may compress the parts several times: 45 | 46 | ``` 47 | cat 10G.gzip | gzip > 10G.gzipx2 48 | cat 1T.gzip | gzip | gzip | gzip > 1T.gzipx4 49 | ``` 50 | *NOTE*: The extension `.gzipx2` or `.gzipx4` is only to highlight how many times the file was compressed. 51 | 52 | [![ko-fi](https://www.ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/M4M625UW0) 53 | -------------------------------------------------------------------------------- /bombs/10G.gzip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xumeiquer/nobots/09962093db7688f00a3d83daf96a4940b980d798/bombs/10G.gzip -------------------------------------------------------------------------------- /bombs/1G.gzip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xumeiquer/nobots/09962093db7688f00a3d83daf96a4940b980d798/bombs/1G.gzip -------------------------------------------------------------------------------- /bombs/1T.gzip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xumeiquer/nobots/09962093db7688f00a3d83daf96a4940b980d798/bombs/1T.gzip -------------------------------------------------------------------------------- /bombs/README.md: -------------------------------------------------------------------------------- 1 | BOMBS 2 | ===== 3 | 4 | **DANGER**: The files listed in this directory could freeze your browser or even your computer. 5 | 6 | ## Bombs 7 | * 1G.gzip (2,7Kb) is a 1Gb bomb compressed once. 8 | * 10G.gzip (26Kb) is a 10Gb bomb compressed twice. 9 | * 1T.gzip (45Kb) is a 1Tb bomb compressed four times. 10 | 11 | 12 | -------------------------------------------------------------------------------- /nobots.go: -------------------------------------------------------------------------------- 1 | package nobots 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "net/http" 7 | "os" 8 | "regexp" 9 | 10 | "github.com/caddyserver/caddy" 11 | "github.com/caddyserver/caddy/caddyhttp/httpserver" 12 | ) 13 | 14 | // botUA config representation 15 | type botUA struct { 16 | uas []string 17 | bomb string 18 | re []*regexp.Regexp 19 | public []*regexp.Regexp 20 | } 21 | 22 | // BotUA plugin struct 23 | type BotUA struct { 24 | Next httpserver.Handler 25 | UA *botUA 26 | } 27 | 28 | func init() { 29 | caddy.RegisterPlugin("nobots", caddy.Plugin{ 30 | ServerType: "http", 31 | Action: setup, 32 | }) 33 | } 34 | 35 | // setup callback for parsing the config 36 | func setup(c *caddy.Controller) error { 37 | ua, err := parseUA(c) 38 | if err != nil { 39 | return err 40 | } 41 | 42 | // Verfies whether bomb exist 43 | if _, err := os.Stat(ua.bomb); os.IsNotExist(err) { 44 | return fmt.Errorf("Bomb %s not found.", ua.bomb) 45 | } 46 | 47 | // Setup de middleware 48 | cfg := httpserver.GetConfig(c) 49 | mid := func(next httpserver.Handler) httpserver.Handler { 50 | return BotUA{Next: next, UA: ua} 51 | } 52 | cfg.AddMiddleware(mid) 53 | 54 | return nil 55 | } 56 | 57 | // parseUA propper config parser that generates a botUA object 58 | func parseUA(c *caddy.Controller) (*botUA, error) { 59 | var ua botUA 60 | for c.Next() { 61 | if !c.NextArg() { 62 | return nil, c.ArgErr() 63 | } 64 | ua.bomb = c.Val() 65 | for c.NextBlock() { 66 | switch c.Val() { 67 | case "regexp": 68 | if !c.NextArg() { 69 | return nil, c.ArgErr() 70 | } 71 | re, err := regexp.Compile(c.Val()) 72 | if err != nil { 73 | return nil, fmt.Errorf("%s", err) 74 | } 75 | ua.re = append(ua.re, re) 76 | case "public": 77 | if !c.NextArg() { 78 | return nil, c.ArgErr() 79 | } 80 | re, err := regexp.Compile(c.Val()) 81 | if err != nil { 82 | return nil, fmt.Errorf("%s", err) 83 | } 84 | ua.public = append(ua.public, re) 85 | default: 86 | ua.uas = append(ua.uas, c.Val()) 87 | } 88 | } 89 | } 90 | return &ua, nil 91 | } 92 | 93 | func (b BotUA) ServeHTTP(w http.ResponseWriter, r *http.Request) (int, error) { 94 | // Get request UA 95 | rua := r.UserAgent() 96 | 97 | // Avoid ban UA for public URI 98 | if !b.IsPublicURI(r.URL.Path) { 99 | // Check if the UA is a evil one 100 | if b.IsEvil(rua) { 101 | return serveBomb(w, r, b.UA.bomb) 102 | } 103 | } 104 | // Nothing happens carry on with next stuff 105 | return b.Next.ServeHTTP(w, r) 106 | } 107 | 108 | // IsEvil check the remote UA against evil UAs 109 | func (b BotUA) IsEvil(rua string) bool { 110 | // In case there are regexp 111 | if len(b.UA.re) > 0 { 112 | for _, re := range b.UA.re { 113 | if re.MatchString(rua) { 114 | return true 115 | } 116 | } 117 | } 118 | // In case there are strings 119 | if len(b.UA.uas) > 0 { 120 | for _, ua := range b.UA.uas { 121 | if ua == rua { 122 | return true 123 | } 124 | } 125 | } 126 | // UA is not evil 127 | return false 128 | } 129 | 130 | // IsPublicURI check if the requested URI is defined as public or not 131 | func (b BotUA) IsPublicURI(uri string) bool { 132 | if len(b.UA.public) > 0 { 133 | for _, re := range b.UA.public { 134 | if re.MatchString(uri) { 135 | return true 136 | } 137 | } 138 | } 139 | return false 140 | } 141 | 142 | // serveBomb provides the bomb to front-end 143 | func serveBomb(w http.ResponseWriter, r *http.Request, bomb string) (int, error) { 144 | file, err := ioutil.ReadFile(bomb) 145 | if err != nil { 146 | return http.StatusNotFound, nil 147 | } 148 | 149 | w.Header().Set("Content-Type", "text/html; charset=UTF-8") 150 | w.Header().Set("Content-Encoding", "gzip") 151 | w.Header().Set("Content-Length", fmt.Sprintf("%d", len(file))) 152 | w.Write(file) 153 | return 200, nil 154 | } 155 | -------------------------------------------------------------------------------- /nobots_test.go: -------------------------------------------------------------------------------- 1 | package nobots 2 | 3 | import ( 4 | "io/ioutil" 5 | "net/http" 6 | "net/http/httptest" 7 | "regexp" 8 | "strconv" 9 | "testing" 10 | 11 | "github.com/caddyserver/caddy" 12 | "github.com/caddyserver/caddy/caddyhttp/httpserver" 13 | ) 14 | 15 | var t1 = `nobots "nobots.go" { "Googlebot" }` 16 | 17 | func TestSetup(t *testing.T) { 18 | c := caddy.NewTestController("http", t1) 19 | err := setup(c) 20 | 21 | if err != nil { 22 | t.Errorf("Expected no errors, but got: %v", err) 23 | } 24 | 25 | mids := httpserver.GetConfig(c).Middleware() 26 | if len(mids) == 0 { 27 | t.Fatal("Expected middleware, got 0 instead") 28 | } 29 | 30 | handler := mids[0](httpserver.EmptyNext) 31 | myHandler, ok := handler.(BotUA) 32 | if !ok { 33 | t.Fatalf("Expected handler to be type BotUA, got: %#v", handler) 34 | } 35 | 36 | if !httpserver.SameNext(myHandler.Next, httpserver.EmptyNext) { 37 | t.Error("'Next' field of handler was not set properly") 38 | } 39 | tests := []struct { 40 | input string 41 | shouldErr bool 42 | }{ 43 | // Bomb exists so plugin initiates correctly 44 | {`nobots "nobots.go" { "Googlebot" }`, false}, 45 | // Bomb exists and regexp keyword is valid 46 | {`nobots "nobots.go" { regexp "Googlebot" }`, false}, 47 | /* Bomb exists and regexp keyword is not valid even though 48 | Nobots take it as UA */ 49 | {`nobots "nobots.go" { regex "Googlebot" }`, false}, 50 | // Bomb exists and regexp valid 51 | {`nobots "nobots.go" { regexp "^Googlebot$" }`, false}, 52 | // Bomb exists and regexp not valid 53 | {`nobots "nobots.go" { regexp "(?Pre" }`, true}, 54 | } 55 | 56 | for i, test := range tests { 57 | _, err := parseUA(caddy.NewTestController("http", test.input)) 58 | if test.shouldErr && err == nil { 59 | t.Errorf("Test %v: Expected error but found nil", i) 60 | } else if !test.shouldErr && err != nil { 61 | t.Errorf("Test %v: Expected no error but found error: %v", i, err) 62 | } 63 | } 64 | } 65 | 66 | var t2 = `nobots "no_exist.go" { "Googlebot" }` 67 | 68 | // Bomb does not exist so the plugin must throw an error 69 | func TestSetup1(t *testing.T) { 70 | c := caddy.NewTestController("http", t2) 71 | err := setup(c) 72 | 73 | if err == nil { 74 | t.Errorf("Expected error but found nil") 75 | } 76 | } 77 | 78 | func TestNobotsWithPublic(t *testing.T) { 79 | funcName := "TestNobotsWithPublic" 80 | myHandler := func(w http.ResponseWriter, r *http.Request) (int, error) { 81 | 82 | return http.StatusOK, nil 83 | } 84 | 85 | filename := "nobots.go" 86 | 87 | rws := []BotUA{ 88 | { 89 | Next: httpserver.HandlerFunc(myHandler), 90 | UA: &botUA{ 91 | bomb: filename, 92 | uas: []string{"Bot"}, 93 | re: nil, 94 | public: []*regexp.Regexp{regexp.MustCompile("/public")}, 95 | }, 96 | }, { 97 | Next: httpserver.HandlerFunc(myHandler), 98 | UA: &botUA{ 99 | bomb: filename, 100 | uas: nil, 101 | re: []*regexp.Regexp{regexp.MustCompile("^Bot")}, 102 | public: []*regexp.Regexp{regexp.MustCompile("/public")}, 103 | }, 104 | }, { 105 | Next: httpserver.HandlerFunc(myHandler), 106 | UA: &botUA{ 107 | bomb: filename, 108 | uas: nil, 109 | re: nil, 110 | public: nil, 111 | }, 112 | }, 113 | } 114 | 115 | file, err := ioutil.ReadFile(filename) 116 | if err != nil { 117 | 118 | } 119 | 120 | fileSize := strconv.Itoa(len(file)) 121 | 122 | type headerType struct { 123 | type_ string 124 | encoding string 125 | length string 126 | } 127 | 128 | type testType struct { 129 | path string 130 | ua string 131 | result int 132 | header headerType 133 | } 134 | 135 | tests := []testType{ 136 | { 137 | path: "/private", 138 | ua: "Bot", 139 | result: http.StatusOK, 140 | header: headerType{ 141 | type_: "text/html; charset=UTF-8", 142 | encoding: "gzip", 143 | length: fileSize, 144 | }, 145 | }, { 146 | path: "/this/is/public", 147 | ua: "Bot", 148 | result: http.StatusOK, 149 | header: headerType{ 150 | type_: "", 151 | encoding: "", 152 | length: "", 153 | }, 154 | }, { 155 | path: "/public", 156 | ua: "Bot", 157 | result: http.StatusOK, 158 | header: headerType{ 159 | type_: "", 160 | encoding: "", 161 | length: "", 162 | }, 163 | }, { 164 | path: "/private", 165 | ua: "Got", 166 | result: http.StatusOK, 167 | header: headerType{ 168 | type_: "", 169 | encoding: "", 170 | length: "", 171 | }, 172 | }, { 173 | path: "/public", 174 | ua: "Got", 175 | result: http.StatusOK, 176 | header: headerType{ 177 | type_: "", 178 | encoding: "", 179 | length: "", 180 | }, 181 | }, { 182 | path: "/private", 183 | ua: "", 184 | result: http.StatusOK, 185 | header: headerType{ 186 | type_: "", 187 | encoding: "", 188 | length: "", 189 | }, 190 | }, 191 | } 192 | 193 | for i, rw := range rws { 194 | for j, test := range tests { 195 | req, err := http.NewRequest("GET", test.path, nil) 196 | if err != nil { 197 | t.Fatalf("Test %d: Could not create HTTP request: %v", j, err) 198 | } 199 | 200 | req.Header.Set("User-Agent", test.ua) 201 | rec := httptest.NewRecorder() 202 | result, err := rw.ServeHTTP(rec, req) 203 | 204 | if err != nil { 205 | t.Fatalf("Test %d: Could not ServeHTTP: %v", j, err) 206 | } 207 | 208 | if result != test.result { 209 | t.Errorf("Test %d: Expected status code %d but was %d", 210 | j, test.result, result) 211 | } 212 | 213 | if len(rw.UA.uas) > 0 || len(rw.UA.re) > 0 { 214 | if rec.HeaderMap.Get("Content-Type") != test.header.type_ { 215 | t.Errorf("Test %d-%d (%s): Expected Content-Type '%s' but found '%s'", 216 | i, j, funcName, test.header.type_, rec.HeaderMap.Get("Content-Type")) 217 | } 218 | if rec.HeaderMap.Get("Content-Encoding") != test.header.encoding { 219 | t.Errorf("Test %d-%d (%s): Expected Content-Encoding '%s' but found '%s'", 220 | i, j, funcName, test.header.encoding, rec.HeaderMap.Get("Content-Encoding")) 221 | } 222 | if rec.HeaderMap.Get("Content-Length") != test.header.length { 223 | t.Errorf("Test %d-%d (%s): Expected Content-Length '%s' but found '%s'", 224 | i, j, funcName, test.header.length, rec.HeaderMap.Get("Content-Length")) 225 | } 226 | } 227 | } 228 | } 229 | 230 | } 231 | 232 | func TestNobots(t *testing.T) { 233 | funcName := "TestNobots" 234 | myHandler := func(w http.ResponseWriter, r *http.Request) (int, error) { 235 | 236 | return http.StatusOK, nil 237 | } 238 | 239 | filename := "nobots.go" 240 | 241 | rws := []BotUA{ 242 | { 243 | Next: httpserver.HandlerFunc(myHandler), 244 | UA: &botUA{ 245 | bomb: filename, 246 | uas: []string{"Bot"}, 247 | re: nil, 248 | public: nil, 249 | }, 250 | }, { 251 | Next: httpserver.HandlerFunc(myHandler), 252 | UA: &botUA{ 253 | bomb: filename, 254 | uas: nil, 255 | re: []*regexp.Regexp{regexp.MustCompile("^Bot")}, 256 | public: nil, 257 | }, 258 | }, { 259 | Next: httpserver.HandlerFunc(myHandler), 260 | UA: &botUA{ 261 | bomb: filename, 262 | uas: nil, 263 | re: nil, 264 | public: nil, 265 | }, 266 | }, 267 | } 268 | 269 | file, err := ioutil.ReadFile(filename) 270 | if err != nil { 271 | 272 | } 273 | 274 | fileSize := strconv.Itoa(len(file)) 275 | 276 | type headerType struct { 277 | type_ string 278 | encoding string 279 | length string 280 | } 281 | 282 | type testType struct { 283 | path string 284 | ua string 285 | result int 286 | header headerType 287 | } 288 | 289 | tests := []testType{ 290 | { 291 | path: "/private", 292 | ua: "Bot", 293 | result: http.StatusOK, 294 | header: headerType{ 295 | type_: "text/html; charset=UTF-8", 296 | encoding: "gzip", 297 | length: fileSize, 298 | }, 299 | }, { 300 | path: "/public", 301 | ua: "Bot", 302 | result: http.StatusOK, 303 | header: headerType{ 304 | type_: "text/html; charset=UTF-8", 305 | encoding: "gzip", 306 | length: fileSize, 307 | }, 308 | }, { 309 | path: "/private", 310 | ua: "Got", 311 | result: http.StatusOK, 312 | header: headerType{ 313 | type_: "", 314 | encoding: "", 315 | length: "", 316 | }, 317 | }, { 318 | path: "/public", 319 | ua: "Got", 320 | result: http.StatusOK, 321 | header: headerType{ 322 | type_: "", 323 | encoding: "", 324 | length: "", 325 | }, 326 | }, { 327 | path: "/private", 328 | ua: "", 329 | result: http.StatusOK, 330 | header: headerType{ 331 | type_: "", 332 | encoding: "", 333 | length: "", 334 | }, 335 | }, 336 | } 337 | 338 | for i, rw := range rws { 339 | for j, test := range tests { 340 | req, err := http.NewRequest("GET", test.path, nil) 341 | if err != nil { 342 | t.Fatalf("Test %d: Could not create HTTP request: %v", j, err) 343 | } 344 | 345 | req.Header.Set("User-Agent", test.ua) 346 | rec := httptest.NewRecorder() 347 | result, err := rw.ServeHTTP(rec, req) 348 | 349 | if err != nil { 350 | t.Fatalf("Test %d: Could not ServeHTTP: %v", j, err) 351 | } 352 | 353 | if result != test.result { 354 | t.Errorf("Test %d: Expected status code %d but was %d", 355 | j, test.result, result) 356 | } 357 | 358 | if len(rw.UA.uas) > 0 || len(rw.UA.re) > 0 { 359 | if rec.HeaderMap.Get("Content-Type") != test.header.type_ { 360 | t.Errorf("Test %d-%d (%s): Expected Content-Type '%s' but found '%s'", 361 | i, j, funcName, test.header.type_, rec.HeaderMap.Get("Content-Type")) 362 | } 363 | if rec.HeaderMap.Get("Content-Encoding") != test.header.encoding { 364 | t.Errorf("Test %d-%d (%s): Expected Content-Encoding '%s' but found '%s'", 365 | i, j, funcName, test.header.encoding, rec.HeaderMap.Get("Content-Encoding")) 366 | } 367 | if rec.HeaderMap.Get("Content-Length") != test.header.length { 368 | t.Errorf("Test %d-%d (%s): Expected Content-Length '%s' but found '%s'", 369 | i, j, funcName, test.header.length, rec.HeaderMap.Get("Content-Length")) 370 | } 371 | } 372 | } 373 | } 374 | 375 | } 376 | --------------------------------------------------------------------------------