├── .gitignore ├── README.md ├── index.js ├── package.json └── test ├── fixtures └── robots.txt └── index.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | *.log 3 | .idea -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # express-robots 2 | 3 | Express middleware for generating a robots.txt or responding with an existing file. 4 | 5 | ## Using a file 6 | 7 | ```javascript 8 | app.use(robots(__dirname + '/robots.txt')); 9 | ``` 10 | 11 | ## Using an object 12 | 13 | ### Basic object 14 | 15 | ```javascript 16 | app.use(robots({UserAgent: '*', Disallow: '/'})) 17 | ``` 18 | 19 | #### Will produce: 20 | ``` 21 | UserAgent: * 22 | Disallow: / 23 | ``` 24 | 25 | ### Crawl Delay 26 | You can optionally pass a CrawlDelay in just like passing in Disallow 27 | 28 | ```javascript 29 | app.use(robots({UserAgent: '*', Disallow: '/', CrawlDelay: '5'})) 30 | ``` 31 | 32 | #### Will produce: 33 | ``` 34 | UserAgent: * 35 | Disallow: / 36 | Crawl-delay: 5 37 | ``` 38 | 39 | ### Or an array of objects 40 | 41 | ```javascript 42 | app.use(robots([ 43 | { 44 | UserAgent: 'Googlebot', 45 | Disallow: '/no-google' 46 | }, 47 | { 48 | UserAgent: 'Bingbot', 49 | Disallow: '/no-bing' 50 | } 51 | ])); 52 | ``` 53 | 54 | #### Will produce: 55 | ``` 56 | UserAgent: Googlebot 57 | Disallow: /no-google 58 | UserAgent: Bingbot 59 | Disallow: /no-bing 60 | ``` 61 | 62 | ### Or either property (UserAgent | Disallow) as an array 63 | 64 | ```javascript 65 | app.use(robots([ 66 | { 67 | UserAgent: [ 'Googlebot', 'Slurp' ], 68 | Disallow: [ '/no-google', '/no-yahoo' ] 69 | }, 70 | { 71 | UserAgent: '*', 72 | Disallow: [ '/no-bots', '/still-no-bots' ] 73 | } 74 | ])); 75 | ``` 76 | 77 | #### Will produce: 78 | ``` 79 | UserAgent: Googlebot 80 | UserAgent: Slurp 81 | Disallow: /no-google 82 | Disallow: /no-yahoo 83 | UserAgent: * 84 | Disallow: /no-bots 85 | Disallow: /still-no-bots 86 | ``` 87 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'); 2 | var asArray = require('as-array'); 3 | 4 | if (!Array.isArray) { 5 | Array.isArray = function(arg) { 6 | return Object.prototype.toString.call(arg) === '[object Array]'; 7 | }; 8 | } 9 | 10 | module.exports = function(robots) { 11 | var app = require('express')(); 12 | 13 | if(robots) { 14 | robots = 'string' === typeof robots 15 | ? fs.readFileSync(robots, 'utf8') 16 | : render(robots); 17 | } else 18 | robots = ''; 19 | 20 | app.get('/robots.txt', function(req, res) { 21 | res.header('Content-Type', 'text/plain'); 22 | res.send(robots); 23 | }); 24 | 25 | return app; 26 | }; 27 | 28 | function render(robots) { 29 | return asArray(robots).map(function(robot) { 30 | var userAgentArray = []; 31 | if (Array.isArray(robot.UserAgent)) { 32 | userAgentArray = robot.UserAgent.map(function(userAgent) { 33 | return 'User-agent: ' + userAgent 34 | }); 35 | } else { 36 | userAgentArray.push('User-agent: ' + robot.UserAgent); 37 | } 38 | if (robot.CrawlDelay) { 39 | userAgentArray.push('Crawl-delay: ' + robot.CrawlDelay); 40 | } 41 | return userAgentArray.concat(asArray(robot.Disallow).map(function(disallow) { 42 | if (Array.isArray(disallow)) { 43 | return disallow.map(function(line) { 44 | return 'Disallow: ' + line; 45 | }).join('\n'); 46 | } 47 | return 'Disallow: ' + disallow; 48 | })).join('\n'); 49 | }).join('\n'); 50 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "express-robots", 3 | "version": "0.1.6", 4 | "description": "Express middleware for generating your robots.txt", 5 | "main": "index.js", 6 | "directories": { 7 | "test": "test" 8 | }, 9 | "scripts": { 10 | "test": "mocha" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "https://github.com/weo-edu/express-robots" 15 | }, 16 | "keywords": [ 17 | "express", 18 | "robots", 19 | "robots.txt", 20 | "robots" 21 | ], 22 | "author": "ashaffer (http://github.com/ashaffer)", 23 | "license": "MIT", 24 | "bugs": { 25 | "url": "https://github.com/weo-edu/express-robots/issues" 26 | }, 27 | "homepage": "https://github.com/weo-edu/express-robots", 28 | "dependencies": { 29 | "as-array": "git://github.com/weo-edu/as-array" 30 | }, 31 | "devDependencies": { 32 | "chai": "^2.1.0", 33 | "express": "^4.12.1", 34 | "mocha": "^2.1.0", 35 | "supertest": "^0.15.0" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /test/fixtures/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: Googlebot 2 | Disallow: /nogoogle -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'); 2 | var expect = require('chai').expect; 3 | var supertest = require('supertest'); 4 | var robots = require('../'); 5 | 6 | describe('express-robots', function() { 7 | it('should work', function(done) { 8 | var request = supertest(robots({UserAgent: '*', Disallow: '/'})); 9 | request 10 | .get('/robots.txt') 11 | .end(function(err, res) { 12 | expect(res.status).to.equal(200); 13 | expect(res.headers['content-type']).to.equal('text/plain; charset=utf-8'); 14 | expect(res.text).to.equal('User-agent: *\nDisallow: /'); 15 | done(); 16 | }); 17 | }); 18 | 19 | it('should work with a crawl delay', function(done) { 20 | var request = supertest(robots({UserAgent: '*', CrawlDelay: '5'})); 21 | request 22 | .get('/robots.txt') 23 | .end(function(err, res) { 24 | expect(res.status).to.equal(200); 25 | expect(res.headers['content-type']).to.equal('text/plain; charset=utf-8'); 26 | expect(res.text).to.equal('User-agent: *\nCrawl-delay: 5'); 27 | done(); 28 | }); 29 | }); 30 | 31 | it('should work with multiple crawl delays', function(done) { 32 | var request = supertest(robots([ 33 | {UserAgent: '*', CrawlDelay: '5'}, 34 | {UserAgent: 'Foo', CrawlDelay: '10'} 35 | ])); 36 | request 37 | .get('/robots.txt') 38 | .end(function(err, res) { 39 | expect(res.status).to.equal(200); 40 | expect(res.headers['content-type']).to.equal('text/plain; charset=utf-8'); 41 | expect(res.text).to.equal('User-agent: *\nCrawl-delay: 5\nUser-agent: Foo\nCrawl-delay: 10'); 42 | done(); 43 | }); 44 | }); 45 | 46 | it('should work with files', function() { 47 | var request = supertest(robots(__dirname + '/fixtures/robots.txt')); 48 | request 49 | .get('/robots.txt') 50 | .end(function(err, res) { 51 | expect(res.status).to.equal(200); 52 | expect(res.text).to.equal(fs.readFileSync(__dirname + '/fixtures/robots.txt', 'utf8')); 53 | }); 54 | }); 55 | 56 | it('should respond with an empty file if nothing is specified', function() { 57 | var request = supertest(robots()); 58 | request 59 | .get('/robots.txt') 60 | .end(function(err, res) { 61 | expect(res.status).to.equal(200); 62 | expect(res.text).to.equal(''); 63 | }); 64 | }); 65 | 66 | }); --------------------------------------------------------------------------------