├── .gitignore ├── test ├── mocha.opts ├── .eslintrc ├── fixtures │ └── example │ │ └── index.html ├── server.js └── index.js ├── circle.yml ├── History.md ├── Makefile ├── package.json ├── .eslintrc ├── lib └── index.js └── Readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .DS_Store -------------------------------------------------------------------------------- /test/mocha.opts: -------------------------------------------------------------------------------- 1 | --slow 1s 2 | --timeout 5s 3 | -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | machine: 2 | node: 3 | version: 0.11.13 -------------------------------------------------------------------------------- /test/.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "mocha": true 4 | }, 5 | "rules": { 6 | "camelcase": 0, 7 | "dot-notation": [ 2, { "allowPattern": "^[A-Z]" } ], 8 | "new-cap": 0 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /test/fixtures/example/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Example 5 | 6 | 7 |

Hello World

8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /test/server.js: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * Module dependencies. 4 | */ 5 | 6 | var http = require('http'); 7 | var path = require('path'); 8 | var serve = require('serve-static'); 9 | 10 | /** 11 | * Single export. 12 | */ 13 | 14 | module.exports = http.createServer(serve(path.join(__dirname, 'fixtures'))); 15 | -------------------------------------------------------------------------------- /History.md: -------------------------------------------------------------------------------- 1 | 2 | 0.1.3 / 2015-04-01 3 | ================== 4 | 5 | * update readme and manifest 6 | * updating readme to point to sherlock-segment 7 | 8 | 0.1.2 / 2015-03-25 9 | ================== 10 | 11 | * specifying public registry 12 | 13 | 0.1.1 / 2015-03-24 14 | ================== 15 | 16 | * slight rename to get onto npm 17 | * preparing to open-source 18 | 19 | 0.1.0 / 2015-03-24 20 | ================== 21 | 22 | * removing segment integrations, implementing plugin architecture 23 | 24 | 0.0.1 / 2015-02-28 25 | ================== 26 | 27 | * initial release 28 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | # 3 | # Environment. 4 | # 5 | 6 | NODE ?= node 7 | 8 | # 9 | # Binaries. 10 | # 11 | 12 | BIN := ./node_modules/.bin 13 | MOCHA := $(BIN)/mocha 14 | ESLINT := $(BIN)/eslint 15 | 16 | # 17 | # Test. 18 | # 19 | 20 | test: | node_modules 21 | @$(MOCHA) 22 | 23 | # 24 | # Lint. 25 | # 26 | 27 | lint: | node_modules 28 | @$(ESLINT) . 29 | 30 | # 31 | # Node Modules. 32 | # 33 | 34 | node_modules: package.json 35 | @npm install 36 | 37 | # 38 | # Clean. 39 | # 40 | 41 | distclean: 42 | @rm -rf node_modules 43 | 44 | # 45 | # Phonies. 46 | # 47 | 48 | .PHONY: test 49 | .PHONY: lint 50 | .PHONY: distclean 51 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sherlock-inspector", 3 | "version": "0.1.3", 4 | "main": "lib/index.js", 5 | "description": "Detect which services a website is using and find their API keys.", 6 | "keywords": [ 7 | "sherlock", 8 | "inspector", 9 | "detect", 10 | "services", 11 | "plugins" 12 | ], 13 | "repository": "segmentio/sherlock", 14 | "license": "MIT", 15 | "dependencies": { 16 | "debug": "^2.1.3", 17 | "nightmare": "^1.8.0", 18 | "normalize-url": "^1.2.0" 19 | }, 20 | "devDependencies": { 21 | "eslint": "^0.17.1", 22 | "mocha": "^2.2.1", 23 | "serve-static": "^1.9.2" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "browser": true, 4 | "node": true 5 | }, 6 | "rules": { 7 | "curly": 0, 8 | "eol-last": [ 2, "always" ], 9 | "no-shadow": 0, 10 | "no-trailing-spaces": 2, 11 | "no-underscore-dangle": 0, 12 | "no-use-before-define": 0, 13 | "quotes": [ 2, "single" ], 14 | "space-after-function-name": [ 2, "never" ], 15 | "space-after-keywords": [ 2, "always" ], 16 | "space-before-blocks": [ 2, "always" ], 17 | "space-in-brackets": [ 2, "always", { "propertyName": false } ], 18 | "space-in-parens": [ 2, "never" ], 19 | "space-return-throw-case": [ 2, "always" ], 20 | "strict": [ 2, "never" ] 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /lib/index.js: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * Dependencies. 4 | */ 5 | 6 | var debug = require('debug')('sherlock'); 7 | var normalize = require('normalize-url'); 8 | var Nightmare = require('nightmare'); 9 | 10 | /** 11 | * Expose `Sherlock`. 12 | */ 13 | 14 | module.exports = Sherlock; 15 | 16 | /** 17 | * Sherlock. 18 | */ 19 | 20 | function Sherlock() { 21 | if (!(this instanceof Sherlock)) return new Sherlock(); 22 | this.services = []; 23 | } 24 | 25 | /** 26 | * Analyze a `url`. 27 | * 28 | * @param {String} url 29 | * @param {Function} callback 30 | */ 31 | 32 | Sherlock.prototype.analyze = function(url, callback) { 33 | debug('starting: %s', url); 34 | 35 | var nightmare = new Nightmare({ loadImages: false }); 36 | var services = this.services; 37 | var results = {}; 38 | var attempts = {}; 39 | 40 | nightmare 41 | .on('error', function (msg) { 42 | debug('error: %s', msg); 43 | }) 44 | .goto(normalize(url)) 45 | .evaluate(scripts, detect) 46 | .run(function (err) { 47 | callback(err, results); 48 | }); 49 | 50 | function detect(tags) { 51 | debug('scripts: %d', tags.length); 52 | 53 | var matches = match(services, tags); 54 | debug('matches: %s', Object.keys(matches).join(', ')); 55 | 56 | matches.forEach(function (service) { 57 | var name = service.name; 58 | var extract = service.settings; 59 | 60 | if (!extract) { 61 | results[name] = true; 62 | return; 63 | } 64 | 65 | attempts[name] = 0; 66 | nightmare.evaluate(extract, update); 67 | 68 | function update(settings) { 69 | if (settings) { 70 | debug('settings: %s', name, settings); 71 | results[name] = settings; 72 | } else if (++attempts[name] <= 5) { 73 | debug('retrying: %s', name); 74 | nightmare.wait(500).evaluate(extract, update); 75 | } else { 76 | debug('no settings: %s', name); 77 | results[name] = {}; 78 | } 79 | } 80 | }); 81 | } 82 | }; 83 | 84 | /** 85 | * Use a plugin. 86 | * 87 | * @param {Object} plugin 88 | * @returns {Sherlock} 89 | */ 90 | 91 | Sherlock.prototype.use = function (plugin) { 92 | if (Array.isArray(plugin)) { 93 | plugin.forEach(this.use, this); 94 | } else { 95 | this.services.push(plugin); 96 | } 97 | 98 | return this; 99 | }; 100 | 101 | /** 102 | * Get all matching `tags`. 103 | * 104 | * @param {Array:Object} services 105 | * @param {Array:String} scripts 106 | * @return {Array:Object} 107 | */ 108 | 109 | function match(services, scripts) { 110 | return services.filter(function (service) { 111 | if (!service.script) return false; 112 | return scripts.some(function (src) { 113 | if (typeof service.script === 'string') { 114 | return service.script === src; 115 | } else if (service.script instanceof RegExp) { 116 | return service.script.test(src); 117 | } else if (typeof service.script === 'function') { 118 | return !!service.script(src); 119 | } else { 120 | return false; 121 | } 122 | }); 123 | }); 124 | } 125 | 126 | /** 127 | * Get all of the script tags on a page. 128 | * 129 | * @return {Array:String} 130 | */ 131 | 132 | function scripts() { 133 | return [].slice.call(document.getElementsByTagName('script')) 134 | .filter(function (js) { 135 | var src = js.src || ''; 136 | var absolute = src.indexOf('//') === 0 || src.indexOf('http') === 0; 137 | return src || absolute; 138 | }) 139 | .map(function (js) { 140 | return js.src; 141 | }); 142 | } 143 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | [![Circle CI](https://circleci.com/gh/segmentio/sherlock.svg?style=svg&circle-token=549661cd6c45d67690129d9737a0402f31cb1657)](https://circleci.com/gh/segmentio/sherlock) 2 | 3 | # Sherlock 4 | 5 | > Used to scrape a web page to detect what 3rd-party services are being used. 6 | > Check out [sherlock-segment](https://github.com/segmentio/sherlock-segment) 7 | > for a collection of plugin examples. 8 | 9 | ## Example 10 | 11 | ```js 12 | var sherlock = require('sherlock-inspector'); 13 | 14 | // add plugins to support various services 15 | var segment = require('sherlock-segment'); 16 | 17 | // even custom ones you define yourself! 18 | var myTracker = { 19 | name: 'myTracker', 20 | script: /http(s)?:\/\/(.)\.myTracker\.com/, 21 | settings: function () { 22 | return window.myTracker.id; 23 | } 24 | }; 25 | 26 | sherlock() 27 | .use(segment) 28 | .use(myTracker) 29 | .analyze('segment.com', function (err, results) { 30 | console.log(results); // { id: 'abc123' } 31 | }); 32 | ``` 33 | 34 | 35 | ## How It Works 36 | 37 | Sherlock performs the following steps during it's analysis: 38 | 39 | 1. Opens a `url` using [Nightmare](http://www.nightmarejs.org/) 40 | 2. Iterates through configured services looking for `