├── .bowerrc ├── .gitattributes ├── .gitignore ├── LICENSE.txt ├── README.md ├── bower.json ├── config ├── config-dev.json ├── config-prod.json ├── config.json └── index.js ├── data-access ├── site-repository-models.js └── site-repository.js ├── gulpfile.js ├── infrastructure ├── accessLogger.js └── logger.js ├── models └── site-models.js ├── package.json ├── pm2-apps.json ├── scripts └── update-site-mapper.sh ├── third_party_libs └── mongodb-queue-fixed │ ├── .gitignore │ ├── .travis.yml │ ├── README.md │ ├── mongodb-queue.js │ ├── package.json │ └── test │ ├── clean.js │ ├── dead-queue.js │ ├── default.js │ ├── delay.js │ ├── indexes.js │ ├── multi.js │ ├── ping.js │ ├── setup.js │ ├── stats.js │ └── visibility.js ├── web ├── app.js ├── binding-models │ └── binding-models.js ├── index.js ├── public │ ├── css │ │ ├── non-responsive.css │ │ └── site.css │ ├── images │ │ ├── map_alentum.png │ │ ├── map_weblogexpert.png │ │ └── progress.gif │ ├── js │ │ ├── main.js │ │ └── map.js │ ├── libs │ │ └── tipsy │ │ │ ├── jquery.tipsy.js │ │ │ └── tipsy.css │ ├── robots.txt │ ├── wle_tracker.gif │ └── wle_tracker.js ├── routes │ ├── map.js │ └── root.js └── views │ ├── errors │ ├── 404.html │ └── 500.html │ ├── layouts │ ├── layout.html │ └── usual-layout.html │ ├── map │ └── index.html │ └── root │ ├── about.html │ ├── crawler.html │ └── index.html └── worker ├── index.js ├── mapping-client.js ├── mapping-engine.js ├── site-crawler ├── site-crawler.js └── test │ ├── crawler-test.js │ └── promise-test.js └── utils └── objectExtensions.js /.bowerrc: -------------------------------------------------------------------------------- 1 | { 2 | "directory": "web/public/components/" 3 | } -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # dist folder for web 2 | web/public_dist 3 | web/public_dist_temp 4 | 5 | # WebStorm files 6 | .idea/workspace.xml 7 | 8 | # Logs 9 | logs 10 | *.log 11 | 12 | # Runtime data 13 | pids 14 | *.pid 15 | *.seed 16 | 17 | # Directory for instrumented libs generated by jscoverage/JSCover 18 | lib-cov 19 | 20 | # Coverage directory used by tools like istanbul 21 | coverage 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # node-waf configuration 27 | .lock-wscript 28 | 29 | # Compiled binary addons (http://nodejs.org/api/addons.html) 30 | build/Release 31 | 32 | # Dependency directory 33 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git- 34 | # node_modules 35 | 36 | # Debug log from npm 37 | npm-debug.log 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Visual Site Mapper - Node.js version 2 | This repository contents source code of the Visual Site Mapper, Node.js version. 3 | 4 | Visual Site Mapper is a free service that can quickly show a map of a site. The service is available at http://www.visualsitemapper.com. 5 | 6 | The application uses Node.js, PM2 as process manager and MongoDB for data storage. 7 | 8 | # License 9 | Affero GPL v3 10 | -------------------------------------------------------------------------------- /bower.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "site-mapper", 3 | "version": "0.0.0", 4 | "authors": [ 5 | "Alentum Software Ltd." 6 | ], 7 | "description": "", 8 | "main": "", 9 | "moduleType": [], 10 | "license": "AGPLv3", 11 | "homepage": "", 12 | "private": true, 13 | "ignore": [ 14 | "**/.*", 15 | "node_modules", 16 | "bower_components", 17 | "test", 18 | "tests" 19 | ], 20 | "dependencies": { 21 | "jquery": "~2.1.4", 22 | "bootstrap": "^3.3.5", 23 | "js-cookie": "~2.0.3" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /config/config-dev.json: -------------------------------------------------------------------------------- 1 | { 2 | "web": { 3 | "port": 3000, 4 | "startWorker": true 5 | } 6 | } -------------------------------------------------------------------------------- /config/config-prod.json: -------------------------------------------------------------------------------- 1 | { 2 | "exceptionLogPath": "../logs/site-mapper/exception.log", 3 | "web": { 4 | "port": 8080, 5 | "startWorker": false, 6 | "logPath": "../logs/site-mapper/web/app.log", 7 | "accessLogPath": "../logs/site-mapper/web/access/" 8 | }, 9 | "worker": { 10 | "logPath": "../logs/site-mapper/worker/app.log" 11 | } 12 | } -------------------------------------------------------------------------------- /config/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "siteDatabase": { 3 | "uri": "mongodb://localhost/sitemapper", 4 | "options": { 5 | "server": { 6 | "socketOptions": { 7 | "keepAlive": 1 8 | } 9 | } 10 | } 11 | }, 12 | "exceptionLogPath": "logs/exception.log", 13 | "web": { 14 | "port": 3000, 15 | "startWorker": false, 16 | "logPath": "logs/web/app.log", 17 | "accessLogPath": "logs/web/access/", 18 | "assetsVersion": 1, 19 | "bundles": { 20 | "js": { 21 | "default": [ 22 | "wle_tracker.js", 23 | "components/jquery/dist/jquery.js", 24 | "components/bootstrap/dist/js/bootstrap.js", 25 | "components/js-cookie/src/js.cookie.js", 26 | "js/main.js" 27 | ], 28 | "map": [ 29 | "components/d3/d3.min.js", 30 | "libs/tipsy/jquery.tipsy.js", 31 | "components/seedrandom/seedrandom.js", 32 | "js/map.js" 33 | ] 34 | }, 35 | "css": { 36 | "default": [ 37 | "components/bootstrap/dist/css/bootstrap.css", 38 | "css/non-responsive.css", 39 | "css/site.css" 40 | ], 41 | "map": [ 42 | "libs/tipsy/tipsy.css" 43 | ] 44 | } 45 | } 46 | }, 47 | "worker": { 48 | "logPath": "logs/worker/app.log" 49 | }, 50 | "mapper": { 51 | "refreshPeriodInDays": 7, 52 | "maxNodesToShow": 200 53 | } 54 | } -------------------------------------------------------------------------------- /config/index.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var nconf = require('nconf'); 4 | var path = require('path'); 5 | 6 | nconf.argv() 7 | .env(); 8 | 9 | if (nconf.get('NODE_ENV') == 'production') { 10 | nconf.file('production', { file: path.join(__dirname, 'config-prod.json') }); 11 | } 12 | else { 13 | nconf.file('development', { file: path.join(__dirname, 'config-dev.json') }); 14 | } 15 | 16 | nconf.file({ file: path.join(__dirname, 'config.json') }); 17 | 18 | module.exports = nconf; -------------------------------------------------------------------------------- /data-access/site-repository-models.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var mongoose = require('mongoose'), 4 | Schema = mongoose.Schema; 5 | 6 | // Schemas 7 | var pageSchema = new Schema({ 8 | id: Number, 9 | url: String, 10 | title: String, 11 | distanceFromRoot: Number, 12 | httpStatus: Number, 13 | status: Number, 14 | linksTo: [Number] 15 | }, { 16 | _id: false 17 | }); 18 | 19 | var siteInfoSchema = new Schema({ 20 | _id: String, 21 | progress: Number, 22 | status: Number, 23 | statusDescription: String, 24 | statusTime: Date, 25 | pageCount: Number, 26 | linkCount: Number, 27 | refreshEnabled: Boolean 28 | }); 29 | 30 | var siteContentsSchema = new Schema({ 31 | _id: String, 32 | pages: [pageSchema] 33 | }); 34 | 35 | // Models 36 | exports.SiteInfo = mongoose.model('SiteInfo', siteInfoSchema, 'siteinfo'); 37 | exports.SiteContents = mongoose.model('SiteContents', siteContentsSchema, 'sitecontents'); -------------------------------------------------------------------------------- /data-access/site-repository.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var mongoose = require('mongoose'); 4 | var Promise = require('bluebird'); 5 | mongoose.Promise = Promise; 6 | var config = require('../config/index'); 7 | 8 | // Models 9 | var models = require('./site-repository-models'); 10 | var siteModels = require('../models/site-models'); 11 | var SiteInfo = models.SiteInfo; 12 | var SiteContents = models.SiteContents; 13 | var mongoDbQueue = require('../third_party_libs/mongodb-queue-fixed/mongodb-queue'); 14 | 15 | function SiteRepository() { 16 | var queueAddCount = 0; 17 | 18 | mongoose.connect(config.get('siteDatabase:uri'), config.get('siteDatabase:options')); 19 | 20 | var queuePromise = new Promise(function (resolve, reject) { 21 | mongoose.connection.on('open', function () { 22 | var queue = Promise.promisifyAll(mongoDbQueue(mongoose.connection.db, 'sitequeue')); 23 | 24 | queue.ensureIndexesAsync() 25 | .then(() => queue.cleanAsync()) 26 | .then(() => resolve(queue)); 27 | }); 28 | 29 | mongoose.connection.on('error', function (err) { 30 | reject(err); 31 | }); 32 | }); 33 | 34 | this.close = function () { 35 | return mongoose.connection.close(); 36 | }; 37 | 38 | function saveSiteInfo(info, overwrite, updateOnly) { 39 | if (overwrite === undefined) { 40 | overwrite = true; 41 | } 42 | 43 | if (updateOnly === undefined) { 44 | updateOnly = false; 45 | } 46 | 47 | if (info == null) { 48 | return Promise.reject('info is null'); 49 | } 50 | 51 | if (!siteModels.SiteInfo.isValidDomain(info.domain)) { 52 | return Promise.reject('Invalid domain'); 53 | } 54 | 55 | var siteInfo = new SiteInfo(info); 56 | siteInfo.set('_id', info.domain); 57 | if (overwrite) { 58 | return SiteInfo.findOneAndUpdate({ _id: info.domain }, siteInfo, { upsert: !updateOnly }); 59 | } 60 | else { 61 | return siteInfo.save(); 62 | } 63 | } 64 | 65 | function saveSiteContents(domain, contents, overwrite, updateOnly) { 66 | if (overwrite === undefined) { 67 | overwrite = true; 68 | } 69 | 70 | if (updateOnly === undefined) { 71 | updateOnly = false; 72 | } 73 | 74 | if (!siteModels.SiteInfo.isValidDomain(domain)) { 75 | return Promise.reject('Invalid domain'); 76 | } 77 | 78 | if (contents == null) { 79 | return Promise.reject('contents is null'); 80 | } 81 | 82 | var siteContents = new SiteContents(contents); 83 | siteContents.set('_id', domain); 84 | if (overwrite) { 85 | return SiteContents.findOneAndUpdate({ _id: domain }, siteContents, { upsert: !updateOnly }); 86 | } 87 | else { 88 | return siteContents.save(); 89 | } 90 | } 91 | 92 | function getSiteInfo(domain) { 93 | if (!siteModels.SiteInfo.isValidDomain(domain)) { 94 | return Promise.reject('Invalid domain'); 95 | } 96 | 97 | return SiteInfo.findById(domain) 98 | .then(res => { 99 | if (res) { 100 | var info = res.toObject(); 101 | info.domain = res.get('_id'); 102 | return info; 103 | } 104 | else { 105 | return null; 106 | } 107 | }); 108 | } 109 | 110 | function getSiteContents(domain) { 111 | if (!siteModels.SiteInfo.isValidDomain(domain)) { 112 | return Promise.reject('Invalid domain'); 113 | } 114 | 115 | return SiteContents.findById(domain) 116 | .then(res => res ? res.toObject() : null); 117 | } 118 | 119 | function deleteSiteInfo(domain) { 120 | if (!siteModels.SiteInfo.isValidDomain(domain)) { 121 | return Promise.reject('Invalid domain'); 122 | } 123 | 124 | return SiteInfo.findByIdAndRemove(domain); 125 | } 126 | 127 | function deleteSiteContents(domain) { 128 | if (!siteModels.SiteInfo.isValidDomain(domain)) { 129 | return Promise.reject('Invalid domain'); 130 | } 131 | 132 | return SiteContents.findByIdAndRemove(domain); 133 | } 134 | 135 | this.saveSite = function (site, overwrite) { 136 | if (overwrite === undefined) { 137 | overwrite = true; 138 | } 139 | 140 | return saveSiteInfo(site.info, overwrite) 141 | .then(() => saveSiteContents(site.info.domain, site.contents, overwrite)); 142 | }; 143 | 144 | this.updateSiteInfo = function (siteInfo) { 145 | return saveSiteInfo(siteInfo, true, true); 146 | }; 147 | 148 | this.getSite = function (domain, includeContents, contentsTimeStamp) { 149 | var site = new siteModels.Site(); 150 | return getSiteInfo(domain) 151 | .then(info => { 152 | site.info = info; 153 | 154 | if (info && includeContents && ((contentsTimeStamp == null) || (site.info.statusTime == null) || 155 | (contentsTimeStamp != site.info.statusTime.getTime()))) { 156 | return getSiteContents(domain); 157 | } 158 | }) 159 | .then(contents => { 160 | if (contents) { 161 | site.contents = contents; 162 | } 163 | 164 | return site.info ? site : null; 165 | }); 166 | }; 167 | 168 | this.removeSite = function (domain) { 169 | if (!siteModels.SiteInfo.isValidDomain(domain)) { 170 | return Promise.reject('Invalid domain'); 171 | } 172 | 173 | return deleteSiteInfo(domain) 174 | .then(() => deleteSiteContents(domain)); 175 | }; 176 | 177 | this.siteExists = function () { 178 | if (!siteModels.SiteInfo.isValidDomain(domain)) { 179 | return Promise.resolve(false); 180 | } 181 | 182 | return getSiteInfo() 183 | .then(info => !!info); 184 | }; 185 | 186 | this.queueSiteForProcessing = function (domain) 187 | { 188 | queueAddCount++; 189 | 190 | return queuePromise 191 | .then(queue => { 192 | return queue.addAsync(domain) 193 | .then(res => { 194 | if (queueAddCount > 1000) { 195 | queueAddCount = 0; 196 | return queue.cleanAsync() 197 | .then(() => res); 198 | } 199 | else { 200 | return res; 201 | } 202 | }); 203 | }); 204 | }; 205 | 206 | this.getProcessQueueSize = function () 207 | { 208 | return queuePromise 209 | .then(queue => queue.sizeAsync()); 210 | }; 211 | 212 | this.getNextSiteForProcessing = function () 213 | { 214 | return queuePromise 215 | .then(queue => { 216 | return queue.getAsync() 217 | .then(msg => { 218 | if (msg && msg.id) { 219 | return queue.ackAsync(msg.ack) 220 | .then(() => msg.payload); 221 | } 222 | else { 223 | return null; 224 | } 225 | }); 226 | }); 227 | } 228 | }; 229 | 230 | module.exports = SiteRepository; -------------------------------------------------------------------------------- /gulpfile.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var gulp = require('gulp'); 4 | var minifycss = require('gulp-minify-css'); 5 | var uglify = require('gulp-uglify'); 6 | var rename = require('gulp-rename'); 7 | var concat = require('gulp-concat'); 8 | var del = require('del'); 9 | var merge = require('merge-stream'); 10 | var _ = require('lodash'); 11 | var runSequence = require('run-sequence'); 12 | var fs = require('fs'); 13 | 14 | var config = require('./config'); 15 | 16 | gulp.task('build-styles', function() { 17 | var stream = merge(); 18 | var bundles = config.get("web:bundles:css"); 19 | 20 | Object.keys(bundles).forEach(name => { 21 | stream.add(gulp.src(bundles[name].map(path => 'web/public/' + _.trimLeft(path, '/'))) 22 | .pipe(minifycss()) 23 | .pipe(concat(name + '.css')) 24 | .pipe(gulp.dest('web/public_dist_temp/css')) 25 | ); 26 | }); 27 | 28 | return stream; 29 | }); 30 | 31 | gulp.task('build-scripts', function() { 32 | var stream = merge(); 33 | var bundles = config.get("web:bundles:js"); 34 | 35 | Object.keys(bundles).forEach(name => { 36 | stream.add(gulp.src(bundles[name].map(path => 'web/public/' + _.trimLeft(path, '/'))) 37 | .pipe(concat(name + '.js')) 38 | .pipe(uglify()) 39 | .pipe(gulp.dest('web/public_dist_temp/js')) 40 | ); 41 | }); 42 | 43 | return stream; 44 | }); 45 | 46 | gulp.task('copy-other-files', function() { 47 | return gulp.src(['web/public/**/*.{gif,png,jpg,jpeg}', 'web/public/*.{html,txt}']) 48 | .pipe(gulp.dest('web/public_dist_temp')); 49 | }); 50 | 51 | gulp.task('clean-temp-dist', function() { 52 | return del(['web/public_dist_temp']); 53 | }); 54 | 55 | gulp.task('move-temp-to-dist', function(callback) { 56 | del('web/public_dist') 57 | .then(function () { 58 | fs.rename('web/public_dist_temp', 'web/public_dist', function (err) { 59 | if (err) { 60 | return callback(err); 61 | } 62 | 63 | return callback(); 64 | }); 65 | }) 66 | .catch(function (err) { 67 | callback(err); 68 | }); 69 | }); 70 | 71 | gulp.task('build', function(callback) { 72 | runSequence('clean-temp-dist', 73 | ['build-scripts', 'build-styles', 'copy-other-files'], 74 | 'move-temp-to-dist', 75 | callback); 76 | }); -------------------------------------------------------------------------------- /infrastructure/accessLogger.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var FileStreamRotator = require('file-stream-rotator'); 4 | var fs = require('fs'); 5 | var morgan = require('morgan'); 6 | var path = require('path'); 7 | var mkdirp = require('mkdirp'); 8 | var config = require('../config'); 9 | 10 | // setup the logger 11 | var logDirectory = path.join(__dirname, '..', config.get('web:accessLogPath')); 12 | 13 | // create directoy if doesn't exist 14 | mkdirp.sync(logDirectory); 15 | 16 | // create a rotating write stream 17 | var accessLogStream = FileStreamRotator.getStream({ 18 | filename: logDirectory + '/access-%DATE%.log', 19 | frequency: 'daily', 20 | verbose: false, 21 | date_format: "YYYYMMDD" 22 | }); 23 | 24 | module.exports = morgan('combined', {stream: accessLogStream}) -------------------------------------------------------------------------------- /infrastructure/logger.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var winston = require('winston'); 4 | var path = require('path'); 5 | var mkdirp = require('mkdirp'); 6 | var config = require('../config'); 7 | 8 | winston.handleExceptions(new winston.transports.File({ 9 | filename: path.join(__dirname, '..', config.get('exceptionLogPath')) 10 | }), new winston.transports.Console({ 11 | humanReadableUnhandledException: true 12 | })); 13 | 14 | var webLogPath = path.join(__dirname, '..', config.get('web:logPath')); 15 | mkdirp.sync(path.dirname(webLogPath)); 16 | 17 | winston.loggers.add('web', { 18 | console: { 19 | colorize: true, 20 | label: 'web' 21 | }, 22 | file: { 23 | filename: webLogPath, 24 | json: false 25 | } 26 | }); 27 | 28 | var workerLogPath = path.join(__dirname, '..', config.get('worker:logPath')); 29 | mkdirp.sync(path.dirname(workerLogPath)); 30 | 31 | winston.loggers.add('worker', { 32 | console: { 33 | colorize: true, 34 | label: 'worker' 35 | }, 36 | file: { 37 | filename: workerLogPath, 38 | json: false 39 | } 40 | }); 41 | 42 | module.exports = function (area) { 43 | return winston.loggers.get(area); 44 | }; -------------------------------------------------------------------------------- /models/site-models.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var validator = require('validator'); 4 | var _ = require('lodash'); 5 | 6 | function defineConstField(obj, name, value) { 7 | Object.defineProperty(obj, name, { 8 | value: value, 9 | writable: false, 10 | enumerable: true, 11 | configurable: true 12 | }); 13 | } 14 | 15 | // SiteStatus 16 | var SiteStatus = {}; 17 | defineConstField(SiteStatus, 'Added', 0); 18 | defineConstField(SiteStatus, 'Processed', 1); 19 | defineConstField(SiteStatus, 'ProcessedWithProblems', 2); 20 | defineConstField(SiteStatus, 'Processing', 3); 21 | defineConstField(SiteStatus, 'ConnectionProblem', 4); 22 | defineConstField(SiteStatus, 'RobotsTxtProblem', 5); 23 | 24 | // PageStatus 25 | var PageStatus = {}; 26 | defineConstField(PageStatus, 'Unprocessed', 0); 27 | defineConstField(PageStatus, 'Processed', 1); 28 | defineConstField(PageStatus, 'Error', 2); 29 | defineConstField(PageStatus, 'UnprocessedBecauseOfRobotsTxt', 3); 30 | defineConstField(PageStatus, 'Binary', 4); 31 | defineConstField(PageStatus, 'Processing', 5); 32 | 33 | // Page 34 | function Page() { 35 | var self = this; 36 | 37 | this.id = null; 38 | this.url = null; 39 | this.title = null; 40 | this.distanceFromRoot = null; 41 | this.httpStatus = null; 42 | this.status = null; 43 | this.linksTo = []; 44 | 45 | this.clone = function() { 46 | var clonedPage = new Page(); 47 | clonedPage.id = self.id; 48 | clonedPage.url = self.url; 49 | clonedPage.title = self.title; 50 | clonedPage.distanceFromRoot = self.distanceFromRoot; 51 | clonedPage.httpStatus = self.httpStatus; 52 | clonedPage.status = self.status; 53 | 54 | return clonedPage; 55 | }; 56 | } 57 | 58 | // SiteInfo 59 | function SiteInfo() { 60 | this.domain = null; 61 | this.progress = 0; 62 | this.status = SiteStatus.Added; 63 | this.statusDescription = null; 64 | this.statusTime = new Date(); 65 | this.pageCount = 0; 66 | this.linkCount = 0; 67 | this.refreshEnabled = true; 68 | } 69 | 70 | SiteInfo.isValidDomain = function (domain) { 71 | if (domain == null) { 72 | return false; 73 | } 74 | 75 | return validator.isFQDN(domain); 76 | }; 77 | 78 | SiteInfo.normalizeDomain = function (domain) 79 | { 80 | if (!domain) { 81 | return null; 82 | } 83 | 84 | domain = _.trim(domain, ' /').toLowerCase(); 85 | 86 | var i = domain.indexOf('://'); 87 | if (i != -1) 88 | { 89 | domain = domain.substring(i + 3); 90 | } 91 | 92 | var i = domain.indexOf('/'); 93 | if (i != -1) 94 | { 95 | domain = domain.substring(0, i); 96 | } 97 | 98 | var i = domain.indexOf('#'); 99 | if (i != -1) 100 | { 101 | domain = domain.substring(0, i); 102 | } 103 | 104 | return domain; 105 | }; 106 | 107 | // SiteContents 108 | function SiteContents() { 109 | this.pages = []; 110 | } 111 | 112 | // Site 113 | function Site() { 114 | this.info = new SiteInfo(); 115 | this.contents = new SiteContents(); 116 | } 117 | 118 | // Exports 119 | exports.SiteStatus = SiteStatus; 120 | exports.PageStatus = PageStatus; 121 | exports.Page = Page; 122 | exports.SiteInfo = SiteInfo; 123 | exports.SiteContents = SiteContents; 124 | exports.Site = Site; -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "site-mapper", 3 | "version": "1.0.0", 4 | "private": true, 5 | "description": "", 6 | "main": "index.js", 7 | "scripts": { 8 | "test": "echo \"Error: no test specified\" && exit 1" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "ssh://git-codecommit.us-east-1.amazonaws.com/v1/repos/site-mapper" 13 | }, 14 | "author": "Alentum Software Ltd.", 15 | "license": "AGPLv3", 16 | "dependencies": { 17 | "async": "^1.4.2", 18 | "bluebird": "^2.10.2", 19 | "body-parser": "~1.13.2", 20 | "charset": "^1.0.0", 21 | "cheerio": "^0.19.0", 22 | "compression": "^1.6.0", 23 | "cookie-parser": "~1.3.5", 24 | "debug": "~2.2.0", 25 | "del": "^2.0.2", 26 | "express": "~4.13.1", 27 | "file-stream-rotator": "0.0.6", 28 | "gulp": "^3.9.0", 29 | "gulp-concat": "^2.6.0", 30 | "gulp-minify-css": "^1.2.1", 31 | "gulp-rename": "^1.2.2", 32 | "gulp-uglify": "^1.4.1", 33 | "iconv-lite": "^0.4.13", 34 | "jschardet": "^1.3.0", 35 | "less-middleware": "1.0.x", 36 | "lodash": "^3.10.1", 37 | "merge-stream": "^1.0.0", 38 | "mkdirp": "^0.5.1", 39 | "mongoose": "^4.1.8", 40 | "morgan": "^1.6.1", 41 | "nconf": "^0.8.0", 42 | "request": "^2.62.0", 43 | "requestretry": "^1.5.0", 44 | "robots": "^0.9.4", 45 | "run-sequence": "^1.1.4", 46 | "serve-favicon": "~2.3.0", 47 | "swig": "^1.4.2", 48 | "url": "^0.11.0", 49 | "validator": "^4.0.6", 50 | "winston": "^1.0.2" 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /pm2-apps.json: -------------------------------------------------------------------------------- 1 | { 2 | "apps" : [{ 3 | "name" : "web", 4 | "script" : "web", 5 | "exec_mode" : "cluster", 6 | "instances" : 0, 7 | "watch" : false, 8 | "env": { 9 | "NODE_ENV" : "production" 10 | }, 11 | "post_update" : ["echo Web has been updated, running gulp build...", 12 | "gulp build", 13 | "echo Web is being restarted now"] 14 | }, { 15 | "name" : "worker", 16 | "script" : "worker", 17 | "exec_mode" : "cluster", 18 | "instances" : 0, 19 | "watch" : false, 20 | "env": { 21 | "NODE_ENV" : "production" 22 | } 23 | }] 24 | } -------------------------------------------------------------------------------- /scripts/update-site-mapper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ~/apps/site-mapper 4 | git pull 5 | gulp build 6 | pm2 restart all 7 | cd - 8 | -------------------------------------------------------------------------------- /third_party_libs/mongodb-queue-fixed/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/* 2 | *.log 3 | *~ 4 | -------------------------------------------------------------------------------- /third_party_libs/mongodb-queue-fixed/.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "0.10" 4 | - "0.12" 5 | services: mongodb 6 | -------------------------------------------------------------------------------- /third_party_libs/mongodb-queue-fixed/README.md: -------------------------------------------------------------------------------- 1 | # mongodb-queue # 2 | 3 | [![Build Status](https://travis-ci.org/chilts/mongodb-queue.png)](https://travis-ci.org/chilts/mongodb-queue) [![NPM](https://nodei.co/npm/mongodb-queue.png?mini=true)](https://nodei.co/npm/mongodb-queue/) 4 | 5 | A really light-weight way to create queue with a nice API if you're already 6 | using MongoDB. 7 | 8 | ## Synopsis ## 9 | 10 | Create a connection to your MongoDB database, and use it to create a queue object: 11 | 12 | ```js 13 | var mongodb = require('mongodb') 14 | var mongoDbQueue = require('mongodb-queue') 15 | 16 | var con = 'mongodb://localhost:27017/test' 17 | 18 | mongodb.MongoClient.connect(con, function(err, db) { 19 | var queue = mongoDbQueue(db, 'my-queue') 20 | }) 21 | ``` 22 | 23 | Add a message to a queue: 24 | 25 | ```js 26 | queue.add('Hello, World!', function(err, id) { 27 | // Message with payload 'Hello, World!' added. 28 | // 'id' is returned, useful for logging. 29 | }) 30 | ``` 31 | 32 | Get a message from the queue: 33 | 34 | ```js 35 | queue.get(function(err, msg) { 36 | console.log('msg.id=' + msg.id) 37 | console.log('msg.ack=' + msg.ack) 38 | console.log('msg.payload=' + msg.payload) // 'Hello, World!' 39 | console.log('msg.tries=' + msg.tries) 40 | }) 41 | ``` 42 | 43 | Ping a message to keep it's visibility open for long-running tasks 44 | 45 | ```js 46 | queue.ping(msg.ack, function(err, id) { 47 | // Visibility window now increased for this message id. 48 | // 'id' is returned, useful for logging. 49 | }) 50 | ``` 51 | 52 | Ack a message (and remove it from the queue): 53 | 54 | ```js 55 | queue.ack(msg.ack, function(err, id) { 56 | // This msg removed from queue for this ack. 57 | // The 'id' of the message is returned, useful for logging. 58 | }) 59 | ``` 60 | 61 | By default, all old messages - even processed ones - are left in MongoDB. This is so that 62 | you can go and analyse them if you want. However, you can call the following function 63 | to remove processed messages: 64 | 65 | ```js 66 | queue.clean(function(err) { 67 | // All processed (ie. acked) messages have been deleted 68 | }) 69 | ``` 70 | 71 | And if you haven't already, you should call this to make sure indexes have 72 | been added in MongoDB. Of course, if you've called this once (in some kind 73 | one-off script) you don't need to call it in your program. Of course, check 74 | the changelock to see if you need to update them with new releases: 75 | 76 | ```js 77 | queue.ensureIndexes(function(err) { 78 | // The indexes needed have been added to MongoDB. 79 | }) 80 | ``` 81 | 82 | ## Creating a Queue ## 83 | 84 | To create a queue, call the exported function with the `MongoClient`, the name 85 | and a set of opts. The MongoDB collection used is the same name as the name 86 | passed in: 87 | 88 | ``` 89 | var mongoDbQueue = require('mongodb-queue') 90 | 91 | // an instance of a queue 92 | var queue1 = mongoDbQueue(db, 'a-queue') 93 | // another queue which uses the same collection as above 94 | var queue2 = mongoDbQueue(db, 'a-queue') 95 | ``` 96 | 97 | Note: but don't use the same queue name twice with different options, otherwise things might get confusing. 98 | 99 | To pass options, try this: 100 | 101 | ``` 102 | var resizeQueue = mongoDbQueue(db, 'resize-queue', { visibility : 30, delay : 15 }) 103 | ``` 104 | 105 | ## Options ## 106 | 107 | ### name ### 108 | 109 | This is the name of the MongoDB Collection you wish to use to store the messages. 110 | Each queue you create will be it's own collection. 111 | 112 | e.g. 113 | 114 | ``` 115 | var resizeQueue = mongoDbQueue(db, 'resize-queue') 116 | var notifyQueue = mongoDbQueue(db, 'notify-queue') 117 | ``` 118 | 119 | This will create two collections in MongoDB called `resize-image` and `notify-owner`. 120 | 121 | ### visibility - Message Visibility Window ### 122 | 123 | Default: `30` 124 | 125 | By default, if you don't ack a message within the first 30s after receiving it, 126 | it is placed back in the queue so it can be fetched again. This is called the 127 | visibility window. 128 | 129 | You may set this visibility window on a per queue basis. For example, to set the 130 | visibility to 15 seconds: 131 | 132 | ``` 133 | var queue = mongoDbQueue(db, 'queue', { visibility : 15 }) 134 | ``` 135 | 136 | All messages in this queue now have a visibility window of 15s, instead of the 137 | default 30s. 138 | 139 | ### delay - Delay Messages on Queue ### 140 | 141 | Default: `0` 142 | 143 | When a message is added to a queue, it is immediately available for retrieval. 144 | However, there are times when you might like to delay messages coming off a queue. 145 | ie. if you set delay to be `10`, then every message will only be available for 146 | retrieval 10s after being added. 147 | 148 | To delay all messages by 10 seconds, try this: 149 | 150 | ``` 151 | var queue = mongoDbQueue(db, 'queue', { delay : 10 }) 152 | ``` 153 | 154 | This is now the default for every message added to the queue. 155 | 156 | ### deadQueue - Dead Message Queue ### 157 | 158 | Default: none 159 | 160 | Messages that have been retried over `maxRetries` will be pushed to this queue so you can 161 | automatically see problem messages. 162 | 163 | Pass in a queue (that you created) onto which these messages will be pushed: 164 | 165 | ```js 166 | var deadQueue = mongoDbQueue(db, 'dead-queue') 167 | var queue = mongoDbQueue(db, 'queue', { deadQueue : deadQueue }) 168 | ``` 169 | 170 | If you pop a message off the `queue` over `maxRetries` times and have still not acked it, 171 | it will be pushed onto the `deadQueue` for you. This happens when you `.get()` (not when 172 | you miss acking a message in it's visibility window). By doing it when you call `.get()`, 173 | the unprocessed message will be received, pushed to the `deadQueue`, acked off the normal 174 | queue and `.get()` will check for new messages prior to returning you one (or none). 175 | 176 | ### maxRetries - Maximum Retries per Message ### 177 | 178 | Default: 5 179 | 180 | This option only comes into effect if you pass in a `deadQueue` as shown above. What this 181 | means is that if an item is popped off the queue `maxRetries` times (e.g. 5) and not acked, 182 | it will be moved to this `deadQueue` the next time it is tried to pop off. You can poll your 183 | `deadQueue` for dead messages much like you can poll your regular queues. 184 | 185 | The payload of the messages in the dead queue are the entire messages returned when `.get()`ing 186 | them from the original queue. 187 | 188 | e.g. 189 | 190 | Given this message: 191 | 192 | ``` 193 | msg = { 194 | id: '533b1eb64ee78a57664cc76c', 195 | ack: 'c8a3cc585cbaaacf549d746d7db72f69', 196 | payload: 'Hello, World!', 197 | tries: 1 } 198 | } 199 | ``` 200 | 201 | If it is not acked within the `maxRetries` times, then when you receive this same message 202 | from the `deadQueue`, it may look like this: 203 | 204 | ``` 205 | msg = { 206 | id: '533b1ecf3ca3a76b667671ef', 207 | ack: '73872b204e3f7be84050a1ce82c5c9c0', 208 | payload: { 209 | id: '533b1eb64ee78a57664cc76c', 210 | ack: 'c8a3cc585cbaaacf549d746d7db72f69', 211 | payload: 'Hello, World!', 212 | tries: 5 } 213 | }, 214 | tries: 1 215 | } 216 | ``` 217 | 218 | Notice that the payload from the `deadQueue` is exactly the same as the original message 219 | when it was on the original queue (except with the number of tries set to 5). 220 | 221 | ## Operations ## 222 | 223 | ### .add() ### 224 | 225 | You can add a string to the queue: 226 | 227 | ```js 228 | queue.add('Hello, World!', function(err, id) { 229 | // Message with payload 'Hello, World!' added. 230 | // 'id' is returned, useful for logging. 231 | }) 232 | ``` 233 | 234 | Or add an object of your choosing: 235 | 236 | ```js 237 | queue.add({ err: 'E_BORKED', msg: 'Broken' }, function(err, id) { 238 | // Message with payload { err: 'E_BORKED', msg: 'Broken' } added. 239 | // 'id' is returned, useful for logging. 240 | }) 241 | ``` 242 | 243 | You can delay individual messages from being visible by passing the `delay` option: 244 | 245 | ```js 246 | queue.add('Later', { delay: 120 }, function(err, id) { 247 | // Message with payload 'Later' added. 248 | // 'id' is returned, useful for logging. 249 | // This message won't be available for getting for 2 mins. 250 | }) 251 | ``` 252 | 253 | ### .get() ### 254 | 255 | Retrieve a message from the queue: 256 | 257 | ```js 258 | queue.get(function(err, msg) { 259 | // You can now process the message 260 | }) 261 | ``` 262 | 263 | You can choose the visibility of an individual retrieved message by passing the `visibility` option: 264 | 265 | ```js 266 | queue.get({ visibility: 10 }, function(err, msg) { 267 | // You can now process the message for 10s before it goes back into the queue if not ack'd instead of the duration that is set on the queue in general 268 | }) 269 | ``` 270 | 271 | Message will have the following structure: 272 | 273 | ```js 274 | { 275 | id: '533b1eb64ee78a57664cc76c', // ID of the message 276 | ack: 'c8a3cc585cbaaacf549d746d7db72f69', // ID for ack and ping operations 277 | payload: 'Hello, World!', // Payload passed when the message was addded 278 | tries: 1 // Number of times this message has been retrieved from queue without being ack'd 279 | } 280 | ``` 281 | 282 | ### .ack() ### 283 | 284 | After you have received an item from a queue and processed it, you can delete it 285 | by calling `.ack()` with the unique `ackId` returned: 286 | 287 | ```js 288 | queue.get(function(err, msg) { 289 | queue.ack(msg.ack, function(err, id) { 290 | // this message has now been removed from the queue 291 | }) 292 | }) 293 | ``` 294 | 295 | ### .ping() ### 296 | 297 | After you have received an item from a queue and you are taking a while 298 | to process it, you can `.ping()` the message to tell the queue that you are 299 | still alive and continuing to process the message: 300 | 301 | ```js 302 | queue.get(function(err, msg) { 303 | queue.ping(msg.ack, function(err, id) { 304 | // this message has had it's visibility window extended 305 | }) 306 | }) 307 | ``` 308 | 309 | You can also choose the visibility time that gets added by the ping operation by passing the `visibility` option: 310 | 311 | ```js 312 | queue.get(function(err, msg) { 313 | queue.ping(msg.ack, { visibility: 10 }, function(err, id) { 314 | // this message has had it's visibility window extended by 10s instead of the visibilty set on the queue in general 315 | }) 316 | }) 317 | ``` 318 | 319 | ### .total() ### 320 | 321 | Returns the total number of messages that has ever been in the queue, including 322 | all current messages: 323 | 324 | ```js 325 | queue.total(function(err, count) { 326 | console.log('This queue has seen %d messages', count) 327 | }) 328 | ``` 329 | 330 | ### .size() ### 331 | 332 | Returns the total number of messages that are waiting in the queue. 333 | 334 | ```js 335 | queue.size(function(err, count) { 336 | console.log('This queue has %d current messages', count) 337 | }) 338 | ``` 339 | 340 | ### .inFlight() ### 341 | 342 | Returns the total number of messages that are currently in flight. ie. that 343 | have been received but not yet acked: 344 | 345 | ```js 346 | queue.inFlight(function(err, count) { 347 | console.log('A total of %d messages are currently being processed', count) 348 | }) 349 | ``` 350 | 351 | ### .done() ### 352 | 353 | Returns the total number of messages that have been processed correctly in the 354 | queue: 355 | 356 | ```js 357 | queue.done(function(err, count) { 358 | console.log('This queue has processed %d messages', count) 359 | }) 360 | ``` 361 | 362 | ### .clean() ### 363 | 364 | Deletes all processed mesages from the queue. Of course, you can leave these hanging around 365 | if you wish, but delete them if you no longer need them. Perhaps do this using `setInterval` 366 | for a regular cleaning: 367 | 368 | ```js 369 | queue.clean(function(err) { 370 | console.log('The processed messages have been deleted from the queue') 371 | }) 372 | ``` 373 | 374 | ### Notes about Numbers ### 375 | 376 | If you add up `.size() + .inFlight() + .done()` then you should get `.total()` 377 | but this will only be approximate since these are different operations hitting the database 378 | at slightly different times. Hence, a message or two might be counted twice or not at all 379 | depending on message turnover at any one time. You should not rely on these numbers for 380 | anything but are included as approximations at any point in time. 381 | 382 | ## Use of MongoDB ## 383 | 384 | Whilst using MongoDB recently and having a need for lightweight queues, I realised 385 | that the atomic operations that MongoDB provides are ideal for this kind of job. 386 | 387 | Since everything it atomic, it is impossible to lose messages in or around your 388 | application. I guess MongoDB could lose them but it's a safer bet it won't compared 389 | to your own application. 390 | 391 | As an example of the atomic nature being used, messages stay in the same collection 392 | and are never moved around or deleted, just a couple of fields are set, incremented 393 | or deleted. We always use MongoDB's excellent `collection.findAndModify()` so that 394 | each message is updated atomically inside MongoDB and we never have to fetch something, 395 | change it and store it back. 396 | 397 | ## Note on MongoDB Version ## 398 | 399 | When using MongoDB v2.6 and the v1.3.23 version of the mongodb driver from npm, I was getting 400 | a weird error similar to "key $exists must not start with '$'". Yes, very strange. Anyway, the fix 401 | is to install a later version of the driver. I have tried this with v1.4.9 and it seems ok. 402 | 403 | ## Releases ## 404 | 405 | Yay! We made it to v1.0. This means that development may slow down but to be honest, I have pretty 406 | much all of the functionality I want in this thing done. Thanks to everyone for feedback, reports 407 | and pull requests. 408 | 409 | ### 1.0.0 (2014-10-30) ### 410 | 411 | * [NEW] Ability to specify a visibility window when getting a message (thanks https://github.com/Gertt) 412 | 413 | ### 0.9.1 (2014-08-28) ### 414 | 415 | * [NEW] Added .clean() method to remove old (processed) messages 416 | * [NEW] Add 'delay' option to queue.add() so individual messages can be delayed separately 417 | * [TEST] Test individual 'delay' option for each message 418 | 419 | ### 0.7.0 (2014-03-24) ### 420 | 421 | * [FIX] Fix .ping() so only visible/non-deleted messages can be pinged 422 | * [FIX] Fix .ack() so only visible/non-deleted messages can be pinged 423 | * [TEST] Add test to make sure messages can't be acked twice 424 | * [TEST] Add test to make sure an acked message can't be pinged 425 | * [INTERNAL] Slight function name changes, nicer date routines 426 | 427 | ### 0.6.0 (2014-03-22) ### 428 | 429 | * [NEW] The msg.id is now returned on successful Queue.ping() and Queue.ack() calls 430 | * [NEW] Call quueue.ensureIndexes(callback) to create them 431 | * [CHANGE] When a message is acked, 'deleted' is now set to the current time (not true) 432 | * [CHANGE] The queue is now created synchronously 433 | 434 | ### 0.5.0 (2014-03-21) ### 435 | 436 | * [NEW] Now adds two indexes onto the MongoDB collection used for the message 437 | * [CHANGE] The queue is now created by calling the async exported function 438 | * [DOC] Update to show how the queues are now created 439 | 440 | ### 0.4.0 (2014-03-20) ### 441 | 442 | * [NEW] Ability to ping retrieved messages a. la. 'still alive' and 'extend visibility' 443 | * [CHANGE] Removed ability to have different queues in the same collection 444 | * [CHANGE] All queues are now stored in their own collection 445 | * [CHANGE] When acking a message, only need ack (no longer need id) 446 | * [TEST] Added test for pinged messages 447 | * [DOC] Update to specify each queue will create it's own MongoDB collection 448 | * [DOC] Added docs for option `delay` 449 | * [DOC] Added synopsis for Queue.ping() 450 | * [DOC] Removed use of msg.id when calling Queue.ack() 451 | 452 | ### 0.3.1 (2014-03-19) ### 453 | 454 | * [DOC] Added documentation for the `delay` option 455 | 456 | ### 0.3.0 (2014-03-19) ### 457 | 458 | * [NEW] Return the message id when added to a queue 459 | * [NEW] Ability to set a default delay on all messages in a queue 460 | * [FIX] Make sure old messages (outside of visibility window) aren't deleted when acked 461 | * [FIX] Internal: Fix `queueName` 462 | * [TEST] Added test for multiple messages 463 | * [TEST] Added test for delayed messages 464 | 465 | ### 0.2.1 (2014-03-19) ### 466 | 467 | * [FIX] Fix when getting messages off an empty queue 468 | * [NEW] More Tests 469 | 470 | ### 0.2.0 (2014-03-18) ### 471 | 472 | * [NEW] messages now return number of tries (times they have been fetched) 473 | 474 | ### 0.1.0 (2014-03-18) ### 475 | 476 | * [NEW] add messages to queues 477 | * [NEW] fetch messages from queues 478 | * [NEW] ack messages on queues 479 | * [NEW] set up multiple queues 480 | * [NEW] set your own MongoDB Collection name 481 | * [NEW] set a visibility timeout on a queue 482 | 483 | ## Author ## 484 | 485 | Written by [Andrew Chilton](http://chilts.org/) - 486 | [Twitter](https://twitter.com/andychilton). 487 | 488 | ## License ## 489 | 490 | MIT - http://chilts.mit-license.org/2014/ 491 | 492 | (Ends) 493 | -------------------------------------------------------------------------------- /third_party_libs/mongodb-queue-fixed/mongodb-queue.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * mongodb-queue.js - Use your existing MongoDB as a local queue. 4 | * 5 | * Copyright (c) 2014 Andrew Chilton 6 | * - http://chilts.org/ 7 | * - andychilton@gmail.com 8 | * 9 | * License: http://chilts.mit-license.org/2014/ 10 | * 11 | **/ 12 | 13 | var crypto = require('crypto') 14 | 15 | // some helper functions 16 | function id() { 17 | return crypto.randomBytes(16).toString('hex') 18 | } 19 | 20 | function now() { 21 | return (new Date()).toISOString() 22 | } 23 | 24 | function nowPlusSecs(secs) { 25 | return (new Date(Date.now() + secs * 1000)).toISOString() 26 | } 27 | 28 | module.exports = function(mongoDbClient, name, opts) { 29 | return new Queue(mongoDbClient, name, opts) 30 | } 31 | 32 | // the Queue object itself 33 | function Queue(mongoDbClient, name, opts) { 34 | if ( !mongoDbClient ) { 35 | throw new Error("mongodb-queue: provide a mongodb.MongoClient") 36 | } 37 | if ( !name ) { 38 | throw new Error("mongodb-queue: provide a queue name") 39 | } 40 | opts = opts || {} 41 | 42 | this.name = name 43 | this.col = mongoDbClient.collection(name) 44 | this.visibility = opts.visibility || 30 45 | this.delay = opts.delay || 0 46 | 47 | if ( opts.deadQueue ) { 48 | this.deadQueue = opts.deadQueue 49 | this.maxRetries = opts.maxRetries || 5 50 | } 51 | } 52 | 53 | Queue.prototype.ensureIndexes = function(callback) { 54 | var self = this 55 | 56 | self.col.ensureIndex({ deleted : 1, visible : 1, _id : 1 }, function(err) { 57 | if (err) return callback(err) 58 | self.col.ensureIndex({ ack : 1 }, { unique : true, sparse : true }, function(err) { 59 | if (err) return callback(err) 60 | callback() 61 | }) 62 | }) 63 | } 64 | 65 | Queue.prototype.add = function(payload, opts, callback) { 66 | var self = this 67 | if ( !callback ) { 68 | callback = opts 69 | opts = {} 70 | } 71 | var delay = opts.delay || self.delay 72 | var msg = { 73 | visible : delay ? nowPlusSecs(delay) : now(), 74 | payload : payload, 75 | } 76 | self.col.insert(msg, function(err, results) { 77 | if (err) return callback(err) 78 | // console.log('results[0]', results); 79 | callback(null, '' + results.ops[0]._id) 80 | }) 81 | } 82 | 83 | Queue.prototype.get = function(opts, callback) { 84 | var self = this 85 | if ( !callback ) { 86 | callback = opts 87 | opts = {} 88 | } 89 | 90 | var visibility = opts.visibility || self.visibility 91 | var query = { 92 | visible : { $lt : now() }, 93 | deleted : { $exists : false }, 94 | } 95 | var sort = { 96 | _id : 1 97 | } 98 | var update = { 99 | $inc : { tries : 1 }, 100 | $set : { 101 | ack : id(), 102 | visible : nowPlusSecs(visibility), 103 | } 104 | } 105 | 106 | self.col.findAndModify(query, sort, update, { new : true }, function(err, msg) { 107 | if (err) return callback(err) 108 | if (!msg.value) return callback(null, {}) 109 | 110 | // convert to an external representation 111 | msg = { 112 | // convert '_id' to an 'id' string 113 | id : '' + msg.value._id, 114 | ack : msg.value.ack, 115 | payload : msg.value.payload, 116 | tries : msg.value.tries, 117 | } 118 | 119 | // if we have a deadQueue, then check the tries, else don't 120 | if ( self.deadQueue ) { 121 | // check the tries 122 | if ( msg.tries > self.maxRetries ) { 123 | // So: 124 | // 1) add this message to the deadQueue 125 | // 2) ack this message from the regular queue 126 | // 3) call ourself to return a new message (if exists) 127 | self.deadQueue.add(msg, function(err) { 128 | if (err) return callback(err) 129 | self.ack(msg.ack, function(err) { 130 | if (err) return callback(err) 131 | self.get(callback) 132 | }) 133 | }) 134 | return 135 | } 136 | } 137 | 138 | callback(null, msg) 139 | }) 140 | } 141 | 142 | Queue.prototype.ping = function(ack, opts, callback) { 143 | var self = this 144 | if ( !callback ) { 145 | callback = opts 146 | opts = {} 147 | } 148 | 149 | var visibility = opts.visibility || self.visibility 150 | var query = { 151 | ack : ack, 152 | visible : { $gt : now() }, 153 | deleted : { $exists : false }, 154 | } 155 | var update = { 156 | $set : { 157 | visible : nowPlusSecs(visibility) 158 | } 159 | } 160 | self.col.findAndModify(query, undefined, update, { new : true }, function(err, msg, blah) { 161 | if (err) return callback(err) 162 | if ( !msg ) { 163 | return callback(new Error("Queue.ping(): Unidentified ack : " + ack)) 164 | } 165 | callback(null, '' + msg.value._id) 166 | }) 167 | } 168 | 169 | Queue.prototype.ack = function(ack, callback) { 170 | var self = this 171 | 172 | var query = { 173 | ack : ack, 174 | visible : { $gt : now() }, 175 | deleted : { $exists : false }, 176 | } 177 | var update = { 178 | $set : { 179 | deleted : now(), 180 | } 181 | } 182 | self.col.findAndModify(query, undefined, update, { new : true }, function(err, msg, blah) { 183 | if (err) return callback(err) 184 | if ( !msg.value ) { 185 | return callback(new Error("Queue.ack(): Unidentified ack : " + ack)) 186 | } 187 | callback(null, '' + msg.value._id) 188 | }) 189 | } 190 | 191 | Queue.prototype.clean = function(callback) { 192 | var self = this 193 | 194 | var query = { 195 | deleted : { $exists : true }, 196 | } 197 | 198 | self.col.remove(query, callback) 199 | } 200 | 201 | Queue.prototype.total = function(callback) { 202 | var self = this 203 | 204 | self.col.count(function(err, count) { 205 | if (err) return callback(err) 206 | callback(null, count) 207 | }) 208 | } 209 | 210 | Queue.prototype.size = function(callback) { 211 | var self = this 212 | 213 | var query = { 214 | visible : { $lt : now() }, 215 | deleted : { $exists : false }, 216 | } 217 | 218 | self.col.count(query, function(err, count) { 219 | if (err) return callback(err) 220 | callback(null, count) 221 | }) 222 | } 223 | 224 | Queue.prototype.inFlight = function(callback) { 225 | var self = this 226 | 227 | var query = { 228 | visible : { $gt : now() }, 229 | ack : { $exists : true }, 230 | deleted : { $exists : false }, 231 | } 232 | 233 | self.col.count(query, function(err, count) { 234 | if (err) return callback(err) 235 | callback(null, count) 236 | }) 237 | } 238 | 239 | Queue.prototype.done = function(callback) { 240 | var self = this 241 | 242 | var query = { 243 | deleted : { $exists : true }, 244 | } 245 | 246 | self.col.count(query, function(err, count) { 247 | if (err) return callback(err) 248 | callback(null, count) 249 | }) 250 | } 251 | -------------------------------------------------------------------------------- /third_party_libs/mongodb-queue-fixed/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mongodb-queue", 3 | "version": "1.0.1", 4 | "description": "Message queues which uses MongoDB.", 5 | "main": "mongodb-queue.js", 6 | "scripts": { 7 | "test": "set -e; for FILE in test/*.js; do node $FILE; done" 8 | }, 9 | "dependencies": {}, 10 | "devDependencies": { 11 | "tape": "^2.14.0", 12 | "mongodb": "^1.4.9", 13 | "async": "^0.9.0" 14 | }, 15 | "homepage": "https://github.com/chilts/mongodb-queue", 16 | "repository": { 17 | "type": "git", 18 | "url": "git://github.com/chilts/mongodb-queue.git" 19 | }, 20 | "bugs": { 21 | "url": "http://github.com/chilts/mongodb-queue/issues", 22 | "mail": "andychilton@gmail.com" 23 | }, 24 | "author": { 25 | "name": "Andrew Chilton", 26 | "email": "andychilton@gmail.com", 27 | "url": "http://chilts.org/" 28 | }, 29 | "license": "MIT", 30 | "keywords": [ 31 | "mongodb", 32 | "queue" 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /third_party_libs/mongodb-queue-fixed/test/clean.js: -------------------------------------------------------------------------------- 1 | var async = require('async') 2 | var test = require('tape') 3 | 4 | var setup = require('./setup.js') 5 | var mongoDbQueue = require('../') 6 | 7 | setup(function(db) { 8 | 9 | test('clean: check deleted messages are deleted', function(t) { 10 | var queue = mongoDbQueue(db, 'clean', { visibility : 3 }) 11 | var msg 12 | 13 | async.series( 14 | [ 15 | function(next) { 16 | queue.size(function(err, size) { 17 | t.ok(!err, 'There is no error.') 18 | t.equal(size, 0, 'There is currently nothing on the queue') 19 | next() 20 | }) 21 | }, 22 | function(next) { 23 | queue.total(function(err, size) { 24 | t.ok(!err, 'There is no error.') 25 | t.equal(size, 0, 'There is currently nothing in the queue at all') 26 | next() 27 | }) 28 | }, 29 | function(next) { 30 | queue.clean(function(err) { 31 | t.ok(!err, 'There is no error.') 32 | next() 33 | }) 34 | }, 35 | function(next) { 36 | queue.size(function(err, size) { 37 | t.ok(!err, 'There is no error.') 38 | t.equal(size, 0, 'There is currently nothing on the queue') 39 | next() 40 | }) 41 | }, 42 | function(next) { 43 | queue.total(function(err, size) { 44 | t.ok(!err, 'There is no error.') 45 | t.equal(size, 0, 'There is currently nothing in the queue at all') 46 | next() 47 | }) 48 | }, 49 | function(next) { 50 | queue.add('Hello, World!', function(err) { 51 | t.ok(!err, 'There is no error when adding a message.') 52 | next() 53 | }) 54 | }, 55 | function(next) { 56 | queue.clean(function(err) { 57 | t.ok(!err, 'There is no error.') 58 | next() 59 | }) 60 | }, 61 | function(next) { 62 | queue.size(function(err, size) { 63 | t.ok(!err, 'There is no error.') 64 | t.equal(size, 1, 'Queue size is correct') 65 | next() 66 | }) 67 | }, 68 | function(next) { 69 | queue.total(function(err, size) { 70 | t.ok(!err, 'There is no error.') 71 | t.equal(size, 1, 'Queue total is correct') 72 | next() 73 | }) 74 | }, 75 | function(next) { 76 | queue.get(function(err, newMsg) { 77 | msg = newMsg 78 | t.ok(msg.id, 'Got a msg.id (sanity check)') 79 | next() 80 | }) 81 | }, 82 | function(next) { 83 | queue.size(function(err, size) { 84 | t.ok(!err, 'There is no error.') 85 | t.equal(size, 0, 'Queue size is correct') 86 | next() 87 | }) 88 | }, 89 | function(next) { 90 | queue.total(function(err, size) { 91 | t.ok(!err, 'There is no error.') 92 | t.equal(size, 1, 'Queue total is correct') 93 | next() 94 | }) 95 | }, 96 | function(next) { 97 | queue.clean(function(err) { 98 | t.ok(!err, 'There is no error.') 99 | next() 100 | }) 101 | }, 102 | function(next) { 103 | queue.size(function(err, size) { 104 | t.ok(!err, 'There is no error.') 105 | t.equal(size, 0, 'Queue size is correct') 106 | next() 107 | }) 108 | }, 109 | function(next) { 110 | queue.total(function(err, size) { 111 | t.ok(!err, 'There is no error.') 112 | t.equal(size, 1, 'Queue total is correct') 113 | next() 114 | }) 115 | }, 116 | function(next) { 117 | queue.ack(msg.ack, function(err, id) { 118 | t.ok(!err, 'No error when acking the message') 119 | t.ok(id, 'Received an id when acking this message') 120 | next() 121 | }) 122 | }, 123 | function(next) { 124 | queue.size(function(err, size) { 125 | t.ok(!err, 'There is no error.') 126 | t.equal(size, 0, 'Queue size is correct') 127 | next() 128 | }) 129 | }, 130 | function(next) { 131 | queue.total(function(err, size) { 132 | t.ok(!err, 'There is no error.') 133 | t.equal(size, 1, 'Queue total is correct') 134 | next() 135 | }) 136 | }, 137 | function(next) { 138 | queue.clean(function(err) { 139 | t.ok(!err, 'There is no error.') 140 | next() 141 | }) 142 | }, 143 | function(next) { 144 | queue.size(function(err, size) { 145 | t.ok(!err, 'There is no error.') 146 | t.equal(size, 0, 'Queue size is correct') 147 | next() 148 | }) 149 | }, 150 | function(next) { 151 | queue.total(function(err, size) { 152 | t.ok(!err, 'There is no error.') 153 | t.equal(size, 0, 'Queue total is correct') 154 | next() 155 | }) 156 | }, 157 | ], 158 | function(err) { 159 | if (err) t.fail(err) 160 | t.pass('Finished test ok') 161 | t.end() 162 | } 163 | ) 164 | }) 165 | 166 | test('db.close()', function(t) { 167 | t.pass('db.close()') 168 | db.close() 169 | t.end() 170 | }) 171 | 172 | }) 173 | -------------------------------------------------------------------------------- /third_party_libs/mongodb-queue-fixed/test/dead-queue.js: -------------------------------------------------------------------------------- 1 | var async = require('async') 2 | var test = require('tape') 3 | 4 | var setup = require('./setup.js') 5 | var mongoDbQueue = require('../') 6 | 7 | setup(function(db) { 8 | 9 | test('first test', function(t) { 10 | var queue = mongoDbQueue(db, 'queue', { visibility : 3, deadQueue : 'dead-queue' }) 11 | t.ok(queue, 'Queue created ok') 12 | t.end() 13 | }); 14 | 15 | test('single message going over 5 tries, should appear on dead-queue', function(t) { 16 | var deadQueue = mongoDbQueue(db, 'dead-queue') 17 | var queue = mongoDbQueue(db, 'queue', { visibility : 1, deadQueue : deadQueue }) 18 | var msg 19 | var origId 20 | 21 | async.series( 22 | [ 23 | function(next) { 24 | queue.add('Hello, World!', function(err, id) { 25 | t.ok(!err, 'There is no error when adding a message.') 26 | t.ok(id, 'Received an id for this message') 27 | origId = id 28 | next() 29 | }) 30 | }, 31 | function(next) { 32 | queue.get(function(err, thisMsg) { 33 | setTimeout(function() { 34 | t.pass('First expiration') 35 | next() 36 | }, 2 * 1000) 37 | }) 38 | }, 39 | function(next) { 40 | queue.get(function(err, thisMsg) { 41 | setTimeout(function() { 42 | t.pass('Second expiration') 43 | next() 44 | }, 2 * 1000) 45 | }) 46 | }, 47 | function(next) { 48 | queue.get(function(err, thisMsg) { 49 | setTimeout(function() { 50 | t.pass('Third expiration') 51 | next() 52 | }, 2 * 1000) 53 | }) 54 | }, 55 | function(next) { 56 | queue.get(function(err, thisMsg) { 57 | setTimeout(function() { 58 | t.pass('Fourth expiration') 59 | next() 60 | }, 2 * 1000) 61 | }) 62 | }, 63 | function(next) { 64 | queue.get(function(err, thisMsg) { 65 | setTimeout(function() { 66 | t.pass('Fifth expiration') 67 | next() 68 | }, 2 * 1000) 69 | }) 70 | }, 71 | function(next) { 72 | queue.get(function(err, id) { 73 | t.ok(!err, 'No error when getting no messages') 74 | t.ok(!msg, 'No msg received') 75 | next() 76 | }) 77 | }, 78 | function(next) { 79 | deadQueue.get(function(err, msg) { 80 | t.ok(!err, 'No error when getting from the deadQueue') 81 | t.ok(msg.id, 'Got a message id from the deadQueue') 82 | t.equal(msg.payload.id, origId, 'Got the same message id as the original message') 83 | t.equal(msg.payload.payload, 'Hello, World!', 'Got the same as the original message') 84 | t.equal(msg.payload.tries, 6, 'Got the tries as 6') 85 | next() 86 | }) 87 | }, 88 | ], 89 | function(err) { 90 | t.ok(!err, 'No error during single round-trip test') 91 | t.end() 92 | } 93 | ) 94 | }) 95 | 96 | test('two messages, with first going over 3 tries', function(t) { 97 | var deadQueue = mongoDbQueue(db, 'dead-queue-2') 98 | var queue = mongoDbQueue(db, 'queue-2', { visibility : 1, deadQueue : deadQueue, maxRetries : 3 }) 99 | var msg 100 | var origId, origId2 101 | 102 | async.series( 103 | [ 104 | function(next) { 105 | queue.add('Hello, World!', function(err, id) { 106 | t.ok(!err, 'There is no error when adding a message.') 107 | t.ok(id, 'Received an id for this message') 108 | origId = id 109 | next() 110 | }) 111 | }, 112 | function(next) { 113 | queue.add('Part II', function(err, id) { 114 | t.ok(!err, 'There is no error when adding another message.') 115 | t.ok(id, 'Received an id for this message') 116 | origId2 = id 117 | next() 118 | }) 119 | }, 120 | function(next) { 121 | queue.get(function(err, thisMsg) { 122 | t.equal(thisMsg.id, origId, 'We return the first message on first go') 123 | setTimeout(function() { 124 | t.pass('First expiration') 125 | next() 126 | }, 2 * 1000) 127 | }) 128 | }, 129 | function(next) { 130 | queue.get(function(err, thisMsg) { 131 | t.equal(thisMsg.id, origId, 'We return the first message on second go') 132 | setTimeout(function() { 133 | t.pass('Second expiration') 134 | next() 135 | }, 2 * 1000) 136 | }) 137 | }, 138 | function(next) { 139 | queue.get(function(err, thisMsg) { 140 | t.equal(thisMsg.id, origId, 'We return the first message on third go') 141 | setTimeout(function() { 142 | t.pass('Third expiration') 143 | next() 144 | }, 2 * 1000) 145 | }) 146 | }, 147 | function(next) { 148 | // This is the 4th time, so we SHOULD have moved it to the dead queue 149 | // pior to it being returned. 150 | queue.get(function(err, msg) { 151 | t.ok(!err, 'No error when getting the 2nd message') 152 | t.equal(msg.id, origId2, 'Got the ID of the 2nd message') 153 | t.equal(msg.payload, 'Part II', 'Got the same payload as the 2nd message') 154 | next() 155 | }) 156 | }, 157 | function(next) { 158 | deadQueue.get(function(err, msg) { 159 | t.ok(!err, 'No error when getting from the deadQueue') 160 | t.ok(msg.id, 'Got a message id from the deadQueue') 161 | t.equal(msg.payload.id, origId, 'Got the same message id as the original message') 162 | t.equal(msg.payload.payload, 'Hello, World!', 'Got the same as the original message') 163 | t.equal(msg.payload.tries, 4, 'Got the tries as 4') 164 | next() 165 | }) 166 | }, 167 | ], 168 | function(err) { 169 | t.ok(!err, 'No error during single round-trip test') 170 | t.end() 171 | } 172 | ) 173 | }) 174 | 175 | test('db.close()', function(t) { 176 | t.pass('db.close()') 177 | db.close() 178 | t.end() 179 | }) 180 | 181 | }) 182 | -------------------------------------------------------------------------------- /third_party_libs/mongodb-queue-fixed/test/default.js: -------------------------------------------------------------------------------- 1 | var async = require('async') 2 | var test = require('tape') 3 | 4 | var setup = require('./setup.js') 5 | var mongoDbQueue = require('../') 6 | 7 | setup(function(db) { 8 | 9 | test('first test', function(t) { 10 | var queue = mongoDbQueue(db, 'default') 11 | t.ok(queue, 'Queue created ok') 12 | t.end() 13 | }); 14 | 15 | test('single round trip', function(t) { 16 | var queue = mongoDbQueue(db, 'default') 17 | var msg 18 | 19 | async.series( 20 | [ 21 | function(next) { 22 | queue.add('Hello, World!', function(err, id) { 23 | t.ok(!err, 'There is no error when adding a message.') 24 | t.ok(id, 'Received an id for this message') 25 | next() 26 | }) 27 | }, 28 | function(next) { 29 | queue.get(function(err, thisMsg) { 30 | console.log(thisMsg) 31 | msg = thisMsg 32 | t.ok(msg.id, 'Got a msg.id') 33 | t.equal(typeof msg.id, 'string', 'msg.id is a string') 34 | t.ok(msg.ack, 'Got a msg.ack') 35 | t.equal(typeof msg.ack, 'string', 'msg.ack is a string') 36 | t.ok(msg.tries, 'Got a msg.tries') 37 | t.equal(typeof msg.tries, 'number', 'msg.tries is a number') 38 | t.equal(msg.tries, 1, 'msg.tries is currently one') 39 | t.equal(msg.payload, 'Hello, World!', 'Payload is correct') 40 | next() 41 | }) 42 | }, 43 | function(next) { 44 | queue.ack(msg.ack, function(err, id) { 45 | t.ok(!err, 'No error when acking the message') 46 | t.ok(id, 'Received an id when acking this message') 47 | next() 48 | }) 49 | }, 50 | ], 51 | function(err) { 52 | t.ok(!err, 'No error during single round-trip test') 53 | t.end() 54 | } 55 | ) 56 | }) 57 | 58 | test("single round trip, can't be acked again", function(t) { 59 | var queue = mongoDbQueue(db, 'default') 60 | var msg 61 | 62 | async.series( 63 | [ 64 | function(next) { 65 | queue.add('Hello, World!', function(err, id) { 66 | t.ok(!err, 'There is no error when adding a message.') 67 | t.ok(id, 'Received an id for this message') 68 | next() 69 | }) 70 | }, 71 | function(next) { 72 | queue.get(function(err, thisMsg) { 73 | msg = thisMsg 74 | t.ok(msg.id, 'Got a msg.id') 75 | t.equal(typeof msg.id, 'string', 'msg.id is a string') 76 | t.ok(msg.ack, 'Got a msg.ack') 77 | t.equal(typeof msg.ack, 'string', 'msg.ack is a string') 78 | t.ok(msg.tries, 'Got a msg.tries') 79 | t.equal(typeof msg.tries, 'number', 'msg.tries is a number') 80 | t.equal(msg.tries, 1, 'msg.tries is currently one') 81 | t.equal(msg.payload, 'Hello, World!', 'Payload is correct') 82 | next() 83 | }) 84 | }, 85 | function(next) { 86 | queue.ack(msg.ack, function(err, id) { 87 | t.ok(!err, 'No error when acking the message') 88 | t.ok(id, 'Received an id when acking this message') 89 | next() 90 | }) 91 | }, 92 | function(next) { 93 | queue.ack(msg.ack, function(err, id) { 94 | t.ok(err, 'There is an error when acking the message again') 95 | t.ok(!id, 'No id received when trying to ack an already deleted message') 96 | next() 97 | }) 98 | }, 99 | ], 100 | function(err) { 101 | t.ok(!err, 'No error during single round-trip when trying to double ack') 102 | t.end() 103 | } 104 | ) 105 | }) 106 | 107 | test('db.close()', function(t) { 108 | t.pass('db.close()') 109 | db.close() 110 | t.end() 111 | }) 112 | 113 | }) 114 | -------------------------------------------------------------------------------- /third_party_libs/mongodb-queue-fixed/test/delay.js: -------------------------------------------------------------------------------- 1 | var async = require('async') 2 | var test = require('tape') 3 | 4 | var setup = require('./setup.js') 5 | var mongoDbQueue = require('../') 6 | 7 | setup(function(db) { 8 | 9 | test('delay: check messages on this queue are returned after the delay', function(t) { 10 | var queue = mongoDbQueue(db, 'delay', { delay : 3 }) 11 | 12 | async.series( 13 | [ 14 | function(next) { 15 | queue.add('Hello, World!', function(err, id) { 16 | t.ok(!err, 'There is no error when adding a message.') 17 | t.ok(id, 'There is an id returned when adding a message.') 18 | next() 19 | }) 20 | }, 21 | function(next) { 22 | // get something now and it shouldn't be there 23 | queue.get(function(err, msg) { 24 | t.ok(!err, 'No error when getting no messages') 25 | t.ok(!msg, 'No msg received') 26 | // now wait 4s 27 | setTimeout(next, 4 * 1000) 28 | }) 29 | }, 30 | function(next) { 31 | // get something now and it SHOULD be there 32 | queue.get(function(err, msg) { 33 | t.ok(!err, 'No error when getting a message') 34 | t.ok(msg.id, 'Got a message id now that the message delay has passed') 35 | queue.ack(msg.ack, next) 36 | }) 37 | }, 38 | function(next) { 39 | queue.get(function(err, msg) { 40 | // no more messages 41 | t.ok(!err, 'No error when getting no messages') 42 | t.ok(!msg, 'No more messages') 43 | next() 44 | }) 45 | }, 46 | ], 47 | function(err) { 48 | if (err) t.fail(err) 49 | t.pass('Finished test ok') 50 | t.end() 51 | } 52 | ) 53 | }) 54 | 55 | test('delay: check an individual message delay overrides the queue delay', function(t) { 56 | var queue = mongoDbQueue(db, 'delay') 57 | 58 | async.series( 59 | [ 60 | function(next) { 61 | queue.add('I am delayed by 3 seconds', { delay : 3 }, function(err, id) { 62 | t.ok(!err, 'There is no error when adding a message.') 63 | t.ok(id, 'There is an id returned when adding a message.') 64 | next() 65 | }) 66 | }, 67 | function(next) { 68 | // get something now and it shouldn't be there 69 | queue.get(function(err, msg) { 70 | t.ok(!err, 'No error when getting no messages') 71 | t.ok(!msg, 'No msg received') 72 | // now wait 4s 73 | setTimeout(next, 4 * 1000) 74 | }) 75 | }, 76 | function(next) { 77 | // get something now and it SHOULD be there 78 | queue.get(function(err, msg) { 79 | t.ok(!err, 'No error when getting a message') 80 | t.ok(msg.id, 'Got a message id now that the message delay has passed') 81 | queue.ack(msg.ack, next) 82 | }) 83 | }, 84 | function(next) { 85 | queue.get(function(err, msg) { 86 | // no more messages 87 | t.ok(!err, 'No error when getting no messages') 88 | t.ok(!msg, 'No more messages') 89 | next() 90 | }) 91 | }, 92 | ], 93 | function(err) { 94 | if (err) t.fail(err) 95 | t.pass('Finished test ok') 96 | t.end() 97 | } 98 | ) 99 | }) 100 | 101 | test('db.close()', function(t) { 102 | t.pass('db.close()') 103 | db.close() 104 | t.end() 105 | }) 106 | 107 | }) 108 | -------------------------------------------------------------------------------- /third_party_libs/mongodb-queue-fixed/test/indexes.js: -------------------------------------------------------------------------------- 1 | var async = require('async') 2 | var test = require('tape') 3 | 4 | var setup = require('./setup.js') 5 | var mongoDbQueue = require('../') 6 | 7 | setup(function(db) { 8 | 9 | test('visibility: check message is back in queue after 3s', function(t) { 10 | t.plan(1) 11 | 12 | var queue = mongoDbQueue(db, 'visibility', { visibility : 3 }) 13 | 14 | queue.ensureIndexes(function(err) { 15 | t.ok(!err, 'There was no error when running .ensureIndexes()') 16 | 17 | t.end() 18 | }) 19 | }) 20 | 21 | test('db.close()', function(t) { 22 | t.pass('db.close()') 23 | db.close() 24 | t.end() 25 | }) 26 | 27 | }) 28 | -------------------------------------------------------------------------------- /third_party_libs/mongodb-queue-fixed/test/multi.js: -------------------------------------------------------------------------------- 1 | var async = require('async') 2 | var test = require('tape') 3 | 4 | var setup = require('./setup.js') 5 | var mongoDbQueue = require('../') 6 | 7 | var total = 250 8 | 9 | setup(function(db) { 10 | 11 | test('multi: add ' + total + ' messages, get ' + total + ' back', function(t) { 12 | var queue = mongoDbQueue(db, 'multi') 13 | var msgs = [] 14 | 15 | async.series( 16 | [ 17 | function(next) { 18 | var i, done = 0 19 | for(i=0; i value.map(file => '/' + _.trimLeft(file, '/'))); 47 | bundles.css = _.mapValues(config.get("web:bundles:css"), 48 | (value, key) => value.map(file => '/' + _.trimLeft(file, '/'))); 49 | } 50 | else { 51 | bundles.js = _.mapValues(config.get("web:bundles:js"), 52 | (value, key) => [getAssetWithHash('/js/' + key + '.js')]); 53 | bundles.css = _.mapValues(config.get("web:bundles:css"), 54 | (value, key) => [getAssetWithHash('/css/' + key + '.css')]); 55 | } 56 | 57 | return { bundles }; 58 | } 59 | 60 | var app = express(); 61 | var public_path = (app.get('env') === 'development') ? 'public' : 'public_dist'; 62 | 63 | // Global shared objects 64 | app.set('siteRepository', siteRepository); 65 | app.set('mappingClient', new MappingClient(siteRepository, { 66 | refreshPeriodInDays: config.get('mapper:refreshPeriodInDays') 67 | })); 68 | app.set('appConfig', config); 69 | 70 | // view engine setup 71 | app.engine('html', swig.renderFile); 72 | app.set('views', path.join(__dirname, 'views')); 73 | app.set('view engine', 'html'); 74 | 75 | swig.setDefaults({ locals: _.assign({ now: function () { return new Date(); } }, getBundles())}); 76 | 77 | if (app.get('env') === 'development') { 78 | swig.setDefaults({cache: false}); 79 | } 80 | 81 | app.use(compression()); 82 | // uncomment after placing your favicon in /public 83 | //app.use(favicon(path.join(__dirname, public_path, 'favicon.ico'))); 84 | app.use(logger('dev')); 85 | app.use(bodyParser.json()); 86 | app.use(bodyParser.urlencoded({extended: false})); 87 | app.use(cookieParser()); 88 | //app.use(require('less-middleware')(path.join(__dirname, public_path))); 89 | app.use(require('../infrastructure/accessLogger')); 90 | app.use(express.static(path.join(__dirname, public_path))); 91 | 92 | app.use('/', rootRoutes); 93 | app.use('/', mapRoutes); 94 | 95 | // error 404 96 | app.use(function (req, res, next) { 97 | res.status(404); 98 | res.render('errors/404', { 99 | url: req.path 100 | }); 101 | }); 102 | 103 | // error handler 104 | if (app.get('env') === 'development') { 105 | app.use(function (err, req, res, next) { 106 | res.status(err.status || 500); 107 | res.render('errors/500', { 108 | message: err.message, 109 | error: err 110 | }); 111 | }); 112 | } 113 | 114 | // production error handler 115 | // no stacktraces leaked to user 116 | app.use(function (err, req, res, next) { 117 | res.status(err.status || 500); 118 | res.render('errors/500', { 119 | message: err.message, 120 | error: {} 121 | }); 122 | }); 123 | 124 | function onExit() { 125 | if (mappingEngine && mappingEnginePromise) { 126 | mappingEngine.stop(); 127 | 128 | mappingEnginePromise 129 | .then(() => siteRepository.close()) 130 | .then(() => process.exit(0)); 131 | } 132 | } 133 | 134 | if (useWorker) { 135 | process.on('SIGINT', onExit); 136 | } 137 | 138 | module.exports = app; 139 | -------------------------------------------------------------------------------- /web/binding-models/binding-models.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var siteModels = require('../../models/site-models'); 4 | 5 | function SiteMapDataModel(site, config) { 6 | var self = this; 7 | 8 | this.domain = null; 9 | this.status = null; 10 | this.processing = false; 11 | this.contentsTimeStamp = null; 12 | this.nodes = []; 13 | this.links = []; 14 | 15 | var maxNodesToShow = (config && config.maxNodesToShow) || 200; 16 | 17 | init(site); 18 | 19 | function getFolder(fileName) { 20 | var i = fileName.lastIndexOf('/'); 21 | if (i >= 0) { 22 | return fileName.substring(0, i + 1); 23 | } 24 | else { 25 | return fileName; 26 | } 27 | } 28 | 29 | function getSiteStatus(site) 30 | { 31 | // Problem with this domain 32 | if (site == null) 33 | { 34 | return 'Cannot get information on this domain'; 35 | } 36 | 37 | if ((site.info.status == siteModels.SiteStatus.Processing) || (site.info.status == siteModels.SiteStatus.Added)) 38 | { 39 | return "Processing: " + Math.floor(site.info.progress) + "%"; 40 | } 41 | 42 | return null; 43 | } 44 | 45 | function getErrorByHttpStatus(httpStatus) { 46 | if (httpStatus == null) { 47 | return "Cannot retrieve the page"; 48 | } 49 | 50 | if (httpStatus < 400) { 51 | return null; 52 | } 53 | 54 | switch (httpStatus) { 55 | case 400: 56 | return '400 Bad Request'; 57 | case 401: 58 | return '401 Unauthorized'; 59 | case 402: 60 | return '402 Payment Required'; 61 | case 403: 62 | return '403 Forbidden'; 63 | case 404: 64 | return '404 Not Found'; 65 | case 405: 66 | return '405 Method Not Allowed'; 67 | case 406: 68 | return '406 Not Acceptable'; 69 | case 407: 70 | return '407 Proxy Authentication Required'; 71 | case 408: 72 | return '408 Request Timeout'; 73 | case 409: 74 | return '409 Conflict'; 75 | case 410: 76 | return '410 Gone'; 77 | case 411: 78 | return '411 Length Required'; 79 | case 412: 80 | return '412 Precondition Failed'; 81 | case 413: 82 | return '413 Request Entity Too Large'; 83 | case 414: 84 | return '414 Request-URI Too Long'; 85 | case 415: 86 | return '415 Unsupported Media Type'; 87 | case 416: 88 | return '416 Requested Range Not Satisfiable'; 89 | case 417: 90 | return '417 Expectation Failed'; 91 | case 500: 92 | return '500 Internal Server Error'; 93 | case 501: 94 | return '501 Not Implemented'; 95 | case 502: 96 | return '502 Bad Gateway'; 97 | case 503: 98 | return '503 Service Unavailable'; 99 | case 504: 100 | return '504 Gateway Timeout'; 101 | case 505: 102 | return '505 HTTP Version Not Supported'; 103 | case 511: 104 | return '511 Network Authentication Required'; 105 | default: 106 | return 'Unknown error'; 107 | } 108 | } 109 | 110 | function init(site) { 111 | // Domain & status 112 | self.domain = site.info.domain; 113 | self.status = getSiteStatus(site) || site.info.statusDescription; 114 | self.processing = (site.info.status == siteModels.SiteStatus.Processing) || (site.info.status == siteModels.SiteStatus.Added); 115 | if (site.info.statusTime) { 116 | self.contentsTimeStamp = site.info.statusTime.getTime(); 117 | } 118 | 119 | // Nodes & links 120 | if (!site.contents || !site.contents.pages) { 121 | site.contents = new siteModels.SiteContents(); 122 | } 123 | 124 | if (site.contents.pages.length == 0) { 125 | // Error with processing, no pages available 126 | if (!self.processing) { 127 | return; 128 | } 129 | 130 | var page = new siteModels.Page(); 131 | page.id = 0; 132 | page.url = 'http://' + self.domain + '/'; 133 | page.status = siteModels.PageStatus.Processed; 134 | page.httpStatus = 0; 135 | page.distanceFromRoot = 0; 136 | 137 | site.contents.pages.push(page); 138 | } 139 | 140 | var pages = []; 141 | 142 | // Preparing top pages to show 143 | var level = 0; 144 | do 145 | { 146 | var currentLevelPages = []; 147 | 148 | site.contents.pages.forEach(page => { 149 | if ((page.distanceFromRoot == level) || 150 | ((level > 20) && ((page.distanceFromRoot > 20) || (page.distanceFromRoot < 0)))) { 151 | currentLevelPages.push(page); 152 | } 153 | }); 154 | 155 | if (pages.length + currentLevelPages.length <= maxNodesToShow) { 156 | Array.prototype.push.apply(pages, currentLevelPages); 157 | } 158 | else { 159 | if (pages.length < maxNodesToShow / 2) 160 | { 161 | var i = 0; 162 | while (pages.length < maxNodesToShow) 163 | { 164 | pages.push(currentLevelPages[i]); 165 | i++; 166 | } 167 | } 168 | 169 | break; 170 | } 171 | 172 | level++; 173 | } 174 | while (pages.length < site.contents.pages.length); 175 | 176 | // Preparing nodes 177 | var pageIndexes = new Map(); 178 | var pageGroupes = new Map(); 179 | var index = 0; 180 | var groupCount = 0; 181 | 182 | pages.forEach(page => { 183 | var path = getFolder(page.url); 184 | 185 | var group = pageGroupes.get(path); 186 | if (group === undefined) 187 | { 188 | group = groupCount; 189 | pageGroupes.set(path, group); 190 | groupCount++; 191 | } 192 | 193 | var errorInfo = getErrorByHttpStatus(page.httpStatus); 194 | 195 | if (errorInfo) { 196 | self.nodes.push({ 197 | title: page.title, 198 | url: page.url, 199 | group: group, 200 | error: errorInfo 201 | }); 202 | } 203 | else { 204 | self.nodes.push({ 205 | title: page.title, 206 | url: page.url, 207 | group: group 208 | }); 209 | } 210 | 211 | pageIndexes.set(page.id, index); 212 | index++; 213 | }); 214 | 215 | // Links 216 | site.contents.pages.forEach(page => { 217 | page.linksTo.forEach(endPageId => { 218 | var startNodeIndex = pageIndexes.get(page.id); 219 | var endNodeIndex = pageIndexes.get(endPageId); 220 | 221 | if ((startNodeIndex != null) && (endNodeIndex != null) && (startNodeIndex != endNodeIndex)) { 222 | self.links.push({ 223 | source: startNodeIndex, 224 | target: endNodeIndex 225 | }); 226 | } 227 | }); 228 | }); 229 | 230 | // Finishing 231 | if ((self.status == null) && (pages.length < site.contents.pages.length)) { 232 | self.status = 'Top ' + pages.length + ' pages are shown'; 233 | } 234 | } 235 | } 236 | 237 | exports.SiteMapDataModel = SiteMapDataModel; -------------------------------------------------------------------------------- /web/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /** 4 | * Module dependencies. 5 | */ 6 | 7 | var app = require('./app'); 8 | var debug = require('debug')('express-template:server'); 9 | var http = require('http'); 10 | var config = require('../config/index'); 11 | 12 | /** 13 | * Get port from environment and store in Express. 14 | */ 15 | 16 | var port = normalizePort(config.get('web:port') || '3000'); 17 | app.set('port', port); 18 | 19 | /** 20 | * Create HTTP server. 21 | */ 22 | 23 | var server = http.createServer(app); 24 | 25 | /** 26 | * Listen on provided port, on all network interfaces. 27 | */ 28 | 29 | server.listen(port); 30 | server.on('error', onError); 31 | server.on('listening', onListening); 32 | 33 | /** 34 | * Normalize a port into a number, string, or false. 35 | */ 36 | 37 | function normalizePort(val) { 38 | var port = parseInt(val, 10); 39 | 40 | if (isNaN(port)) { 41 | // named pipe 42 | return val; 43 | } 44 | 45 | if (port >= 0) { 46 | // port number 47 | return port; 48 | } 49 | 50 | return false; 51 | } 52 | 53 | /** 54 | * Event listener for HTTP server "error" event. 55 | */ 56 | 57 | function onError(error) { 58 | if (error.syscall !== 'listen') { 59 | throw error; 60 | } 61 | 62 | var bind = typeof port === 'string' 63 | ? 'Pipe ' + port 64 | : 'Port ' + port; 65 | 66 | // handle specific listen errors with friendly messages 67 | switch (error.code) { 68 | case 'EACCES': 69 | console.error(bind + ' requires elevated privileges'); 70 | process.exit(1); 71 | break; 72 | case 'EADDRINUSE': 73 | console.error(bind + ' is already in use'); 74 | process.exit(1); 75 | break; 76 | default: 77 | throw error; 78 | } 79 | } 80 | 81 | /** 82 | * Event listener for HTTP server "listening" event. 83 | */ 84 | 85 | function onListening() { 86 | var addr = server.address(); 87 | var bind = typeof addr === 'string' 88 | ? 'pipe ' + addr 89 | : 'port ' + addr.port; 90 | debug('Listening on ' + bind); 91 | } 92 | -------------------------------------------------------------------------------- /web/public/css/non-responsive.css: -------------------------------------------------------------------------------- 1 | /* Template-specific stuff 2 | * 3 | * Customizations just for the template; these are not necessary for anything 4 | * with disabling the responsiveness. 5 | */ 6 | 7 | /* Account for fixed navbar */ 8 | body { 9 | min-width: 970px; 10 | padding-top: 70px; 11 | padding-bottom: 30px; 12 | } 13 | 14 | /* Finesse the page header spacing */ 15 | .page-header { 16 | margin-bottom: 30px; 17 | } 18 | .page-header .lead { 19 | margin-bottom: 10px; 20 | } 21 | 22 | 23 | /* Non-responsive overrides 24 | * 25 | * Utilitze the following CSS to disable the responsive-ness of the container, 26 | * grid system, and navbar. 27 | */ 28 | 29 | /* Reset the container */ 30 | .container { 31 | width: 970px; 32 | max-width: none !important; 33 | } 34 | 35 | /* Demonstrate the grids */ 36 | .col-xs-4 { 37 | padding-top: 15px; 38 | padding-bottom: 15px; 39 | background-color: #eee; 40 | background-color: rgba(86,61,124,.15); 41 | border: 1px solid #ddd; 42 | border: 1px solid rgba(86,61,124,.2); 43 | } 44 | 45 | .container .navbar-header, 46 | .container .navbar-collapse { 47 | margin-right: 0; 48 | margin-left: 0; 49 | } 50 | 51 | /* Always float the navbar header */ 52 | .navbar-header { 53 | float: left; 54 | } 55 | 56 | /* Undo the collapsing navbar */ 57 | .navbar-collapse { 58 | display: block !important; 59 | height: auto !important; 60 | padding-bottom: 0; 61 | overflow: visible !important; 62 | } 63 | 64 | .navbar-toggle { 65 | display: none; 66 | } 67 | .navbar-collapse { 68 | border-top: 0; 69 | } 70 | 71 | .navbar-brand { 72 | margin-left: -15px; 73 | } 74 | 75 | /* Always apply the floated nav */ 76 | .navbar-nav { 77 | float: left; 78 | margin: 0; 79 | } 80 | .navbar-nav > li { 81 | float: left; 82 | } 83 | .navbar-nav > li > a { 84 | padding: 15px; 85 | } 86 | 87 | /* Redeclare since we override the float above */ 88 | .navbar-nav.navbar-right { 89 | float: right; 90 | } 91 | 92 | /* Undo custom dropdowns */ 93 | .navbar .navbar-nav .open .dropdown-menu { 94 | position: absolute; 95 | float: left; 96 | background-color: #fff; 97 | border: 1px solid #ccc; 98 | border: 1px solid rgba(0, 0, 0, .15); 99 | border-width: 0 1px 1px; 100 | border-radius: 0 0 4px 4px; 101 | -webkit-box-shadow: 0 6px 12px rgba(0, 0, 0, .175); 102 | box-shadow: 0 6px 12px rgba(0, 0, 0, .175); 103 | } 104 | .navbar-default .navbar-nav .open .dropdown-menu > li > a { 105 | color: #333; 106 | } 107 | .navbar .navbar-nav .open .dropdown-menu > li > a:hover, 108 | .navbar .navbar-nav .open .dropdown-menu > li > a:focus, 109 | .navbar .navbar-nav .open .dropdown-menu > .active > a, 110 | .navbar .navbar-nav .open .dropdown-menu > .active > a:hover, 111 | .navbar .navbar-nav .open .dropdown-menu > .active > a:focus { 112 | color: #fff !important; 113 | background-color: #428bca !important; 114 | } 115 | .navbar .navbar-nav .open .dropdown-menu > .disabled > a, 116 | .navbar .navbar-nav .open .dropdown-menu > .disabled > a:hover, 117 | .navbar .navbar-nav .open .dropdown-menu > .disabled > a:focus { 118 | color: #999 !important; 119 | background-color: transparent !important; 120 | } 121 | -------------------------------------------------------------------------------- /web/public/css/site.css: -------------------------------------------------------------------------------- 1 | body { 2 | padding-top: 50px; 3 | padding-bottom: 20px; 4 | } 5 | 6 | /* Set padding to keep content from hitting the edges */ 7 | .body-content { 8 | padding-left: 15px; 9 | padding-right: 15px; 10 | } 11 | 12 | /* Set width on the form input elements since they're 100% wide by default */ 13 | input, 14 | select, 15 | textarea { 16 | max-width: 280px; 17 | } 18 | 19 | /* styles for validation helpers */ 20 | .field-validation-error { 21 | color: #b94a48; 22 | } 23 | 24 | .field-validation-valid { 25 | display: none; 26 | } 27 | 28 | input.input-validation-error { 29 | border: 1px solid #b94a48; 30 | } 31 | 32 | input[type="checkbox"].input-validation-error { 33 | border: 0 none; 34 | } 35 | 36 | .validation-summary-errors { 37 | color: #b94a48; 38 | } 39 | 40 | .validation-summary-valid { 41 | display: none; 42 | } 43 | 44 | /* Added for Site Mapper */ 45 | .body-content { 46 | min-height: 500px; 47 | } 48 | 49 | .center { 50 | text-align: center; 51 | } 52 | 53 | .address-form-wrapper { 54 | margin-top: 150px; 55 | margin-bottom: 150px; 56 | min-width: 500px; 57 | } 58 | 59 | .address-form { 60 | width: 600px; 61 | display: inline-block; 62 | } 63 | 64 | .address-form .control-label { 65 | margin-right: 5px; 66 | } 67 | 68 | .address-form .form-control { 69 | vertical-align: middle !important; 70 | } 71 | 72 | .address-form .address-edit { 73 | width: 100%; 74 | } 75 | 76 | .form-inline .form-control { 77 | display: inline-block; 78 | } 79 | 80 | .navbar-form .control-label { 81 | margin-right: 5px; 82 | } 83 | 84 | .navbar-form { 85 | padding-right: 0; 86 | } 87 | 88 | .navbar-collapse { 89 | padding-right: 0; 90 | } 91 | 92 | .top-address-edit { 93 | width: 200px; 94 | } 95 | 96 | .map-status-div h5 { 97 | margin-bottom: 5px; 98 | } 99 | 100 | .progress-image { 101 | width: 32px; 102 | height: 11px; 103 | margin-bottom: 2px; 104 | margin-right: 2px; 105 | display: none; 106 | } 107 | 108 | .title-container { 109 | position: relative; 110 | } 111 | 112 | .map-options-div { 113 | position: absolute; 114 | top: 5px; 115 | right: 0; 116 | } 117 | 118 | .map-status-text { 119 | visibility: hidden; 120 | } 121 | 122 | .dropdown-menu > .disabled > a:hover, 123 | .dropdown-menu > .disabled > a:focus { 124 | text-decoration: none; 125 | cursor: default; 126 | } 127 | 128 | @media (max-width: 1300px) { 129 | .dropdown-menu-respalign { 130 | left: auto; 131 | right: 0px; 132 | } 133 | .dropdown-menu-respalign::before { 134 | left: auto !important; 135 | right: 9px; 136 | } 137 | .dropdown-menu-respalign::after { 138 | left: auto !important; 139 | right: 10px; 140 | } 141 | } 142 | 143 | .site-table { 144 | border-bottom: 1px solid #dddddd; 145 | } 146 | 147 | .code { 148 | font-family: monospace; 149 | } 150 | 151 | .sample-map { 152 | border: 1px solid #dddddd; 153 | margin-left: 5px; 154 | margin-right: 5px; 155 | width: 270px; 156 | height: 197px; 157 | -webkit-box-sizing: content-box; 158 | -moz-box-sizing: content-box; 159 | box-sizing: content-box; 160 | } 161 | 162 | .sample-map-panel { 163 | margin-top: 20px; 164 | } 165 | 166 | .sample-map-wrapper { 167 | display: inline-block; 168 | } 169 | 170 | .wle-link-panel { 171 | margin-top: 50px; 172 | } 173 | 174 | /* Map */ 175 | .site-map-box { 176 | height: 630px; 177 | width: 1000px; 178 | margin: 0px auto 0px auto; 179 | } 180 | 181 | .site-map { 182 | width: 1000px; 183 | height: 600px; 184 | margin: 0 auto; 185 | } 186 | 187 | .site-map-loading { 188 | margin: 0 auto; 189 | width: 1000px; 190 | height: 600px; 191 | display: table-cell; 192 | font-size: 18px; 193 | font-weight: bold; 194 | text-align: center; 195 | vertical-align: middle; 196 | } -------------------------------------------------------------------------------- /web/public/images/map_alentum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alentum/sitemapper-nodejs/9f7d23a0b37a29abe62fd29ef63e7d36c7bfc6ba/web/public/images/map_alentum.png -------------------------------------------------------------------------------- /web/public/images/map_weblogexpert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alentum/sitemapper-nodejs/9f7d23a0b37a29abe62fd29ef63e7d36c7bfc6ba/web/public/images/map_weblogexpert.png -------------------------------------------------------------------------------- /web/public/images/progress.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alentum/sitemapper-nodejs/9f7d23a0b37a29abe62fd29ef63e7d36c7bfc6ba/web/public/images/progress.gif -------------------------------------------------------------------------------- /web/public/js/main.js: -------------------------------------------------------------------------------- 1 | function validateUrl(value) { 2 | return /^(http:\/\/|https:\/\/)?(([a-zA-Z0-9]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-zA-Z0-9-]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*\.)+([a-zA-Z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]){2,6}\/?$/.test(value); 3 | } 4 | 5 | function setValidationForAddressEdit(id, bottom) { 6 | var timeout; 7 | 8 | $('#' + id).on('input', function () { 9 | if (timeout) { 10 | $('#' + id).popover('hide'); 11 | 12 | clearTimeout(timeout); 13 | timeout = null; 14 | } 15 | }); 16 | 17 | $('#' + id).parents('form:first').submit(function () { 18 | var st = $('#' + id).val(); 19 | st = st.replace(/^\s+|\s+$/g, ''); 20 | 21 | var error = null; 22 | if (!st) 23 | error = 'Please enter address'; 24 | else if (!validateUrl(st)) 25 | error = 'Invalid address'; 26 | 27 | if (timeout) { 28 | clearTimeout(timeout); 29 | timeout = null; 30 | } 31 | 32 | var _popover = $('#' + id).popover({ 33 | trigger: 'manual', 34 | placement: bottom ? 'bottom' : 'top', 35 | content: error, 36 | template: '

' 37 | }); 38 | 39 | $('#' + id).data('bs.popover').options.content = error; 40 | 41 | if (!error) { 42 | $('#' + id).popover('hide'); 43 | return true; 44 | } 45 | 46 | $('#' + id).popover('show'); 47 | timeout = setTimeout(function () { 48 | $('#' + id).popover('hide'); 49 | timeout = null; 50 | }, 5000); 51 | 52 | return false; 53 | }); 54 | } 55 | 56 | function updateDomainHistory(domain) 57 | { 58 | // Changing list of domains 59 | var history = Cookies.get('history'); 60 | var domains = []; 61 | if (history) 62 | domains = history.split(' '); 63 | 64 | if (domain) { 65 | var i = $.inArray(domain, domains); 66 | if (i != -1) 67 | domains.splice(i, 1); 68 | domains.splice(0, 0, domain); 69 | } 70 | 71 | if (domains.length > 7) 72 | domains.splice(7, domains.length - 7); 73 | 74 | Cookies.set('history', domains.join(' '), { expires: 90, path: '/' }); 75 | 76 | // Updating UI 77 | var historyMenu = $('#domainHistory'); 78 | historyMenu.empty(); 79 | 80 | $.each(domains, function (index, value) { 81 | historyMenu.append($('
  • ').append($('').attr('href', '/map/' + value).text(value))); 82 | }); 83 | 84 | if (domains.length == 0) 85 | historyMenu.append($('
  • ').addClass('disabled').append($('').attr('href', '#').text('No history'))); 86 | 87 | $('li.disabled a').click(function (event) { 88 | event.preventDefault(); 89 | }); 90 | } -------------------------------------------------------------------------------- /web/public/js/map.js: -------------------------------------------------------------------------------- 1 | function zoomToNodes(force) { 2 | var svg = d3.select('#siteMap'); 3 | var nodes = svg.selectAll('circle'); 4 | 5 | var minX = null; 6 | var maxX = null; 7 | var minY = null; 8 | var maxY = null; 9 | 10 | nodes.each(function (node) { 11 | if ((minX == null) || (minX > node.x)) 12 | minX = node.x; 13 | if ((maxX == null) || (maxX < node.x)) 14 | maxX = node.x; 15 | if ((minY == null) || (minY > node.y)) 16 | minY = node.y; 17 | if ((maxY == null) || (maxY < node.y)) 18 | maxY = node.y; 19 | }); 20 | 21 | var width = $('#siteMap').width(), 22 | height = $('#siteMap').height(); 23 | 24 | if ((minX == maxX) && (minY == maxY)) { 25 | nodes.each(function (node) { 26 | node.px = node.x = width / 2; 27 | node.py = node.y = height / 2; 28 | node.fixed = true; 29 | }); 30 | return; 31 | } 32 | 33 | if ((minX == null) || (maxX == null) || (minX == maxX)) 34 | return; 35 | if ((minY == null) || (maxY == null) || (minY == maxY)) 36 | return; 37 | 38 | var rotate = maxX - minX < maxY - minY; 39 | var padding = 40; 40 | 41 | if (!rotate) { 42 | var zoom = Math.min((width - padding) / (maxX - minX), (height - padding) / (maxY - minY)); 43 | 44 | var left = (width - (maxX - minX) * zoom) / 2; 45 | var top = (height - (maxY - minY) * zoom) / 2; 46 | 47 | nodes.each(function (node) { 48 | node.px = node.x = left + (node.x - minX) * zoom; 49 | node.py = node.y = top + (node.y - minY) * zoom; 50 | node.fixed = true; 51 | }); 52 | } 53 | else { 54 | var zoom = Math.min((width - padding) / (maxY - minY), (height - padding) / (maxX - minX)); 55 | 56 | var left = (width - (maxY - minY) * zoom) / 2; 57 | var top = (height - (maxX - minX) * zoom) / 2; 58 | 59 | nodes.each(function (node) { 60 | var x = node.x; 61 | node.px = node.x = left + (node.y - minY) * zoom; 62 | node.py = node.y = top + (x - minX) * zoom; 63 | node.fixed = true; 64 | }); 65 | } 66 | 67 | force.tick(); 68 | } 69 | 70 | function updateMapSize() { 71 | var deltaX = $('#siteMapBox').width() - $('#siteMap').width(); 72 | var deltaY = $('#siteMapBox').height() - $('#siteMap').height(); 73 | var width = Math.max(800, $(window).width() * 0.8); 74 | var height = Math.max(400, $(window).height() - 280); 75 | $('#siteMapBox') 76 | .width(width + deltaX) 77 | .height(height + deltaY); 78 | $('#siteMapLoading, #siteMap') 79 | .width(width) 80 | .height(height); 81 | 82 | d3.select('#siteMap') 83 | .attr('viewBox', '0 0 ' + width + ' ' + height) 84 | } 85 | 86 | function initSiteMap(jsonUrl) { 87 | var lastTimeStamp = null; 88 | var numberOfRefreshes = 0; 89 | var maxNumberOfRefreshes = 30; 90 | 91 | $('.btn').button(); 92 | var mode = Cookies.get('highlightLinkMode'); 93 | if (mode == 1) 94 | $('#highlightOutgoingLabel').button('toggle'); 95 | else if (mode == 2) 96 | $('#highlightIncomingLabel').button('toggle'); 97 | else 98 | $('#highlightAllLabel').button('toggle'); 99 | 100 | $('#linkHighlightOptions label').on('click', function () { 101 | var label = $(this); 102 | var option; 103 | switch (label.attr('id')) 104 | { 105 | case 'highlightOutgoingLabel': 106 | option = 1; 107 | break; 108 | case 'highlightIncomingLabel': 109 | option = 2; 110 | break; 111 | default: 112 | option = 0; 113 | } 114 | 115 | Cookies.set('highlightLinkMode', option, { path: '/' }); 116 | }); 117 | 118 | initSiteMapInternal(); 119 | 120 | function initSiteMapInternal() 121 | { 122 | // Hide all tipsy tooltips 123 | $('.tipsy').remove(); 124 | 125 | // Set map size 126 | updateMapSize(); 127 | var width = $('#siteMap').width(); 128 | var height = $('#siteMap').height(); 129 | var logicalWidth = 1200; 130 | var logicalHeight = 600; 131 | 132 | var url = jsonUrl; 133 | if (lastTimeStamp) 134 | { 135 | url += '?contentsTimeStamp=' + lastTimeStamp; 136 | } 137 | 138 | function showStatus(text) { 139 | $('#statusText').text(text || 'Status'); 140 | $('#statusText').css('visibility', text ? 'visible' : 'hidden') 141 | 142 | if (text && (text.indexOf('%') != -1)) 143 | { 144 | $('#progress').show(); 145 | } 146 | else 147 | { 148 | $('#progress').hide(); 149 | } 150 | } 151 | 152 | d3.json(url, function (error, json) { 153 | 154 | // Use seeded random instead of random to produce the same graph every time 155 | Math.seedrandom('myrandom'); 156 | 157 | // Problem with server 158 | if (!json) 159 | { 160 | if (numberOfRefreshes == 0) 161 | { 162 | $('#siteMapLoading').text('Cannot load map'); 163 | $('#siteMap').hide(); 164 | $('#siteMapLoading').show(); 165 | } 166 | else 167 | { 168 | showStatus('Please refresh the page to update the status'); 169 | } 170 | 171 | return; 172 | } 173 | 174 | lastTimeStamp = json.contentsTimeStamp; 175 | 176 | // Updating status 177 | numberOfRefreshes++; 178 | if (numberOfRefreshes > maxNumberOfRefreshes) 179 | { 180 | showStatus('Please refresh the page to update the status'); 181 | } 182 | else 183 | { 184 | showStatus(json.status); 185 | } 186 | 187 | // Status hasn't changed 188 | if (!json.nodes || !json.links || (json.nodes.length == 0)) 189 | { 190 | if (json.domain) // Json is correct - wait for updates 191 | { 192 | if (json.processing) { 193 | setTimeout(function () { 194 | initSiteMapInternal(); 195 | }, (numberOfRefreshes > 2 ? 10000 : 5000)); 196 | } 197 | else { 198 | $('#siteMapLoading').text(json.status || 'Cannot get map for this site'); 199 | $('#siteMap').hide(); 200 | $('#siteMapLoading').show(); 201 | showStatus(); 202 | } 203 | } 204 | else // Some problem - json is incorrect 205 | { 206 | showStatus('Please refresh the page to update the status'); 207 | } 208 | 209 | return; 210 | } 211 | 212 | // Clear svg contents 213 | d3.select('#siteMap').text(''); 214 | 215 | var svg = d3.select('#siteMap') 216 | .attr('viewBox', '0 0 ' + width + ' ' + height) 217 | .attr('preserveAspectRatio', 'xMidYMid meet') 218 | .attr('pointer-events', 'all') 219 | .call(d3.behavior.zoom().on('zoom', zoomMap)); 220 | 221 | var vis = svg 222 | .append('svg:g'); 223 | 224 | function zoomMap() { 225 | vis.attr('transform', 226 | 'translate(' + d3.event.translate + ')' 227 | + ' scale(' + d3.event.scale + ')'); 228 | svg.selectAll('circle') 229 | .attr('r', function (d) { return ((d.index == 0) ? 15 : 10) / d3.event.scale; }) 230 | .each(setNodeStyleWithScale); 231 | 232 | var width = 1 / d3.event.scale; 233 | svg.selectAll('.link') 234 | .style('stroke-width', width + 'px'); 235 | } 236 | 237 | var colorByGroup = d3.scale.category20(); 238 | 239 | function setNodeStyleWithScale(d) { 240 | var node = d3.select(this); 241 | node.style('fill', colorByGroup(d.group)); 242 | if (d.error) { 243 | node.style({ 244 | 'stroke': 'red', 245 | 'stroke-width': 1.5 / d3.event.scale + 'px', 246 | 'stroke-dasharray': (5 / d3.event.scale) + ', ' + (5 / d3.event.scale) 247 | }); 248 | } 249 | else { 250 | node.style({ 251 | 'stroke-width': 0.5 / d3.event.scale + 'px', 252 | }); 253 | } 254 | } 255 | 256 | function setNodeStyle(d) { 257 | var node = d3.select(this); 258 | node.style('fill', colorByGroup(d.group)); 259 | if (d.error) { 260 | node.style({ 261 | 'stroke': 'red', 262 | 'stroke-width': '1.5px', 263 | 'stroke-dasharray': '5, 5' 264 | }); 265 | } 266 | } 267 | 268 | var force = d3.layout.force() 269 | .gravity(.05) 270 | .distance(100) 271 | .charge(-100) 272 | .size([logicalWidth, logicalHeight]); 273 | 274 | // Updating map 275 | var nodeCount = json.nodes.length; 276 | if (nodeCount != 0) 277 | { 278 | json.nodes.forEach(function (d, i) { 279 | d.x = logicalWidth / nodeCount * i; 280 | d.y = logicalHeight / 2 + Math.random() - 0.5; 281 | }); 282 | } 283 | 284 | force 285 | .nodes(json.nodes) 286 | .links(json.links) 287 | .start(); 288 | 289 | var link = vis.selectAll('.link') 290 | .data(json.links) 291 | .enter().append('line') 292 | .attr('class', 'link'); 293 | 294 | var nodes = vis.selectAll('.node') 295 | .data(json.nodes) 296 | .enter() 297 | .append('circle') 298 | .attr('class', 'node') 299 | .attr('r', function (d) { return (d.index == 0) ? 15 : 10; }) 300 | .each(setNodeStyle) 301 | .on('mouseover', highlightLinks(true)) 302 | .on('mouseout', highlightLinks(false)) 303 | .on('click', function (d) { 304 | window.open(d.url, '_blank'); 305 | }); 306 | 307 | $('svg circle').tipsy({ 308 | gravity: 'w', 309 | html: true, 310 | title: function () { 311 | var d = this.__data__; 312 | return (d.title ? $('
    ').text(d.title).html() + '
    ' : '') + 313 | $('
    ').text(decodeURI(d.url)).html() + 314 | (d.error ? '
    Error: ' + d.error : ''); 315 | } 316 | }); 317 | 318 | // Rewind to end 319 | if (true) { 320 | var k = 0; 321 | while ((force.alpha() > 1e-2) && (k < 300)) { 322 | force.tick(), 323 | k = k + 1; 324 | } 325 | 326 | force.stop(); 327 | } 328 | 329 | function updateNodes() { 330 | link.attr('x1', function (d) { return d.source.x; }) 331 | .attr('y1', function (d) { return d.source.y; }) 332 | .attr('x2', function (d) { return d.target.x; }) 333 | .attr('y2', function (d) { return d.target.y; }); 334 | 335 | nodes.attr('transform', function (d) { return 'translate(' + d.x + ',' + d.y + ')'; }); 336 | } 337 | 338 | function highlightLinks(highlight) { 339 | return function (d) { 340 | if (highlight) { 341 | var label = $('#linkHighlightOptions label.active'); 342 | var highlightIncoming = (label.attr('id') == 'highlightAllLabel') || (label.attr('id') == 'highlightIncomingLabel'); 343 | var highlightOutgoing = (label.attr('id') == 'highlightAllLabel') || (label.attr('id') == 'highlightOutgoingLabel'); 344 | 345 | link.style({ 346 | 'stroke': function (o) { 347 | return ((o.source === d) && highlightOutgoing) || ((o.target === d) && highlightIncoming) ? '#888888' : '#CCCCCC'; 348 | }, 349 | 'stroke-opacity': function (o) { 350 | return ((o.source === d) && highlightOutgoing) || ((o.target === d) && highlightIncoming) ? 1 : 0.1; 351 | } 352 | }); 353 | } 354 | else { 355 | link.style({ 'stroke': null, 'stroke-opacity': null }); 356 | } 357 | }; 358 | } 359 | 360 | zoomToNodes(force); 361 | updateNodes(); 362 | 363 | $('#siteMapLoading').hide(); 364 | $('#siteMap').show(); 365 | 366 | if (json.processing && (numberOfRefreshes <= maxNumberOfRefreshes)) 367 | { 368 | setTimeout(function () { 369 | initSiteMapInternal(); 370 | }, (numberOfRefreshes > 2 ? 10000 : 5000)); 371 | } 372 | 373 | $(window).resize(function () { 374 | updateMapSize(); 375 | zoomToNodes(force); 376 | updateNodes(); 377 | }); 378 | }); 379 | } 380 | } -------------------------------------------------------------------------------- /web/public/libs/tipsy/jquery.tipsy.js: -------------------------------------------------------------------------------- 1 | // tipsy, facebook style tooltips for jquery 2 | // version 1.0.0a 3 | // (c) 2008-2010 jason frame [jason@onehackoranother.com] 4 | // released under the MIT license 5 | 6 | (function($) { 7 | 8 | function maybeCall(thing, ctx) { 9 | return (typeof thing == 'function') ? (thing.call(ctx)) : thing; 10 | }; 11 | 12 | function isElementInDOM(ele) { 13 | while (ele = ele.parentNode) { 14 | if (ele == document) return true; 15 | } 16 | return false; 17 | }; 18 | 19 | function Tipsy(element, options) { 20 | this.$element = $(element); 21 | this.options = options; 22 | this.enabled = true; 23 | this.fixTitle(); 24 | }; 25 | 26 | Tipsy.prototype = { 27 | show: function() { 28 | var title = this.getTitle(); 29 | if (title && this.enabled) { 30 | var $tip = this.tip(); 31 | 32 | $tip.find('.tipsy-inner')[this.options.html ? 'html' : 'text'](title); 33 | $tip[0].className = 'tipsy'; // reset classname in case of dynamic gravity 34 | $tip.remove().css({top: 0, left: 0, visibility: 'hidden', display: 'block'}).prependTo(document.body); 35 | 36 | var pos; 37 | try { 38 | pos = $.extend({}, this.$element.offset(), { 39 | // getBoundingClientRect works correctly for transformed SVG elements 40 | width: this.$element[0].getBoundingClientRect().width, 41 | height: this.$element[0].getBoundingClientRect().height 42 | }); 43 | } 44 | catch (TypeError) { 45 | pos = $.extend({}, this.$element.offset(), { 46 | width: this.$element[0].offsetWidth, 47 | height: this.$element[0].offsetHeight 48 | }); 49 | } 50 | 51 | var actualWidth = $tip[0].offsetWidth, 52 | actualHeight = $tip[0].offsetHeight, 53 | gravity = maybeCall(this.options.gravity, this.$element[0]); 54 | 55 | var tp; 56 | switch (gravity.charAt(0)) { 57 | case 'n': 58 | tp = {top: pos.top + pos.height + this.options.offset, left: pos.left + pos.width / 2 - actualWidth / 2}; 59 | break; 60 | case 's': 61 | tp = {top: pos.top - actualHeight - this.options.offset, left: pos.left + pos.width / 2 - actualWidth / 2}; 62 | break; 63 | case 'e': 64 | tp = {top: pos.top + pos.height / 2 - actualHeight / 2, left: pos.left - actualWidth - this.options.offset}; 65 | break; 66 | case 'w': 67 | tp = {top: pos.top + pos.height / 2 - actualHeight / 2, left: pos.left + pos.width + this.options.offset}; 68 | break; 69 | } 70 | 71 | if (gravity.length == 2) { 72 | if (gravity.charAt(1) == 'w') { 73 | tp.left = pos.left + pos.width / 2 - 15; 74 | } else { 75 | tp.left = pos.left + pos.width / 2 - actualWidth + 15; 76 | } 77 | } 78 | 79 | $tip.css(tp).addClass('tipsy-' + gravity); 80 | $tip.find('.tipsy-arrow')[0].className = 'tipsy-arrow tipsy-arrow-' + gravity.charAt(0); 81 | if (this.options.className) { 82 | $tip.addClass(maybeCall(this.options.className, this.$element[0])); 83 | } 84 | 85 | if (this.options.fade) { 86 | $tip.stop().css({opacity: 0, display: 'block', visibility: 'visible'}).animate({opacity: this.options.opacity}); 87 | } else { 88 | $tip.css({visibility: 'visible', opacity: this.options.opacity}); 89 | } 90 | } 91 | }, 92 | 93 | hide: function() { 94 | if (this.options.fade) { 95 | this.tip().stop().fadeOut(function() { $(this).remove(); }); 96 | } else { 97 | this.tip().remove(); 98 | } 99 | }, 100 | 101 | fixTitle: function() { 102 | var $e = this.$element; 103 | if ($e.attr('title') || typeof($e.attr('original-title')) != 'string') { 104 | $e.attr('original-title', $e.attr('title') || '').removeAttr('title'); 105 | } 106 | }, 107 | 108 | getTitle: function() { 109 | var title, $e = this.$element, o = this.options; 110 | this.fixTitle(); 111 | var title, o = this.options; 112 | if (typeof o.title == 'string') { 113 | title = $e.attr(o.title == 'title' ? 'original-title' : o.title); 114 | } else if (typeof o.title == 'function') { 115 | title = o.title.call($e[0]); 116 | } 117 | title = ('' + title).replace(/(^\s*|\s*$)/, ""); 118 | return title || o.fallback; 119 | }, 120 | 121 | tip: function() { 122 | if (!this.$tip) { 123 | this.$tip = $('
    ').html('
    '); 124 | this.$tip.data('tipsy-pointee', this.$element[0]); 125 | } 126 | return this.$tip; 127 | }, 128 | 129 | validate: function() { 130 | if (!this.$element[0].parentNode) { 131 | this.hide(); 132 | this.$element = null; 133 | this.options = null; 134 | } 135 | }, 136 | 137 | enable: function() { this.enabled = true; }, 138 | disable: function() { this.enabled = false; }, 139 | toggleEnabled: function() { this.enabled = !this.enabled; } 140 | }; 141 | 142 | $.fn.tipsy = function(options) { 143 | 144 | if (options === true) { 145 | return this.data('tipsy'); 146 | } else if (typeof options == 'string') { 147 | var tipsy = this.data('tipsy'); 148 | if (tipsy) tipsy[options](); 149 | return this; 150 | } 151 | 152 | options = $.extend({}, $.fn.tipsy.defaults, options); 153 | 154 | function get(ele) { 155 | var tipsy = $.data(ele, 'tipsy'); 156 | if (!tipsy) { 157 | tipsy = new Tipsy(ele, $.fn.tipsy.elementOptions(ele, options)); 158 | $.data(ele, 'tipsy', tipsy); 159 | } 160 | return tipsy; 161 | } 162 | 163 | function enter() { 164 | var tipsy = get(this); 165 | tipsy.hoverState = 'in'; 166 | if (options.delayIn == 0) { 167 | tipsy.show(); 168 | } else { 169 | tipsy.fixTitle(); 170 | setTimeout(function() { if (tipsy.hoverState == 'in') tipsy.show(); }, options.delayIn); 171 | } 172 | }; 173 | 174 | function leave() { 175 | var tipsy = get(this); 176 | tipsy.hoverState = 'out'; 177 | if (options.delayOut == 0) { 178 | tipsy.hide(); 179 | } else { 180 | setTimeout(function() { if (tipsy.hoverState == 'out') tipsy.hide(); }, options.delayOut); 181 | } 182 | }; 183 | 184 | if (!options.live) this.each(function() { get(this); }); 185 | 186 | if (options.trigger != 'manual') { 187 | var binder = options.live ? 'live' : 'bind', 188 | eventIn = options.trigger == 'hover' ? 'mouseenter' : 'focus', 189 | eventOut = options.trigger == 'hover' ? 'mouseleave' : 'blur'; 190 | this[binder](eventIn, enter)[binder](eventOut, leave); 191 | } 192 | 193 | return this; 194 | 195 | }; 196 | 197 | $.fn.tipsy.defaults = { 198 | className: null, 199 | delayIn: 0, 200 | delayOut: 0, 201 | fade: false, 202 | fallback: '', 203 | gravity: 'n', 204 | html: false, 205 | live: false, 206 | offset: 0, 207 | opacity: 0.8, 208 | title: 'title', 209 | trigger: 'hover' 210 | }; 211 | 212 | $.fn.tipsy.revalidate = function() { 213 | $('.tipsy').each(function() { 214 | var pointee = $.data(this, 'tipsy-pointee'); 215 | if (!pointee || !isElementInDOM(pointee)) { 216 | $(this).remove(); 217 | } 218 | }); 219 | }; 220 | 221 | // Overwrite this method to provide options on a per-element basis. 222 | // For example, you could store the gravity in a 'tipsy-gravity' attribute: 223 | // return $.extend({}, options, {gravity: $(ele).attr('tipsy-gravity') || 'n' }); 224 | // (remember - do not modify 'options' in place!) 225 | $.fn.tipsy.elementOptions = function(ele, options) { 226 | return $.metadata ? $.extend({}, options, $(ele).metadata()) : options; 227 | }; 228 | 229 | $.fn.tipsy.autoNS = function() { 230 | return $(this).offset().top > ($(document).scrollTop() + $(window).height() / 2) ? 's' : 'n'; 231 | }; 232 | 233 | $.fn.tipsy.autoWE = function() { 234 | return $(this).offset().left > ($(document).scrollLeft() + $(window).width() / 2) ? 'e' : 'w'; 235 | }; 236 | 237 | /** 238 | * yields a closure of the supplied parameters, producing a function that takes 239 | * no arguments and is suitable for use as an autogravity function like so: 240 | * 241 | * @param margin (int) - distance from the viewable region edge that an 242 | * element should be before setting its tooltip's gravity to be away 243 | * from that edge. 244 | * @param prefer (string, e.g. 'n', 'sw', 'w') - the direction to prefer 245 | * if there are no viewable region edges effecting the tooltip's 246 | * gravity. It will try to vary from this minimally, for example, 247 | * if 'sw' is preferred and an element is near the right viewable 248 | * region edge, but not the top edge, it will set the gravity for 249 | * that element's tooltip to be 'se', preserving the southern 250 | * component. 251 | */ 252 | $.fn.tipsy.autoBounds = function(margin, prefer) { 253 | return function() { 254 | var dir = {ns: prefer[0], ew: (prefer.length > 1 ? prefer[1] : false)}, 255 | boundTop = $(document).scrollTop() + margin, 256 | boundLeft = $(document).scrollLeft() + margin, 257 | $this = $(this); 258 | 259 | if ($this.offset().top < boundTop) dir.ns = 'n'; 260 | if ($this.offset().left < boundLeft) dir.ew = 'w'; 261 | if ($(window).width() + $(document).scrollLeft() - $this.offset().left < margin) dir.ew = 'e'; 262 | if ($(window).height() + $(document).scrollTop() - $this.offset().top < margin) dir.ns = 's'; 263 | 264 | return dir.ns + (dir.ew ? dir.ew : ''); 265 | } 266 | }; 267 | 268 | })(jQuery); 269 | -------------------------------------------------------------------------------- /web/public/libs/tipsy/tipsy.css: -------------------------------------------------------------------------------- 1 | .tipsy { font-size: 11px; position: absolute; padding: 5px; z-index: 100000; } 2 | .tipsy-inner { background-color: #000; color: #FFF; max-width: 400px; padding: 5px 8px 4px 8px; text-align: center; } 3 | 4 | /* Rounded corners */ 5 | .tipsy-inner { border-radius: 3px; -moz-border-radius: 3px; -webkit-border-radius: 3px; } 6 | 7 | /* Uncomment for shadow */ 8 | /*.tipsy-inner { box-shadow: 0 0 5px #000000; -webkit-box-shadow: 0 0 5px #000000; -moz-box-shadow: 0 0 5px #000000; }*/ 9 | 10 | .tipsy-arrow { position: absolute; width: 0; height: 0; line-height: 0; border: 5px dashed #000; } 11 | 12 | /* Rules to colour arrows */ 13 | .tipsy-arrow-n { border-bottom-color: #000; } 14 | .tipsy-arrow-s { border-top-color: #000; } 15 | .tipsy-arrow-e { border-left-color: #000; } 16 | .tipsy-arrow-w { border-right-color: #000; } 17 | 18 | .tipsy-n .tipsy-arrow { top: 0px; left: 50%; margin-left: -5px; border-bottom-style: solid; border-top: none; border-left-color: transparent; border-right-color: transparent; } 19 | .tipsy-nw .tipsy-arrow { top: 0; left: 10px; border-bottom-style: solid; border-top: none; border-left-color: transparent; border-right-color: transparent;} 20 | .tipsy-ne .tipsy-arrow { top: 0; right: 10px; border-bottom-style: solid; border-top: none; border-left-color: transparent; border-right-color: transparent;} 21 | .tipsy-s .tipsy-arrow { bottom: 0; left: 50%; margin-left: -5px; border-top-style: solid; border-bottom: none; border-left-color: transparent; border-right-color: transparent; } 22 | .tipsy-sw .tipsy-arrow { bottom: 0; left: 10px; border-top-style: solid; border-bottom: none; border-left-color: transparent; border-right-color: transparent; } 23 | .tipsy-se .tipsy-arrow { bottom: 0; right: 10px; border-top-style: solid; border-bottom: none; border-left-color: transparent; border-right-color: transparent; } 24 | .tipsy-e .tipsy-arrow { right: 0; top: 50%; margin-top: -5px; border-left-style: solid; border-right: none; border-top-color: transparent; border-bottom-color: transparent; } 25 | .tipsy-w .tipsy-arrow { left: 0; top: 50%; margin-top: -5px; border-right-style: solid; border-left: none; border-top-color: transparent; border-bottom-color: transparent; } 26 | -------------------------------------------------------------------------------- /web/public/robots.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alentum/sitemapper-nodejs/9f7d23a0b37a29abe62fd29ef63e7d36c7bfc6ba/web/public/robots.txt -------------------------------------------------------------------------------- /web/public/wle_tracker.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alentum/sitemapper-nodejs/9f7d23a0b37a29abe62fd29ef63e7d36c7bfc6ba/web/public/wle_tracker.gif -------------------------------------------------------------------------------- /web/public/wle_tracker.js: -------------------------------------------------------------------------------- 1 | //WebLog Expert Tracker 2.01 2 | (function () { 3 | function RequestTracker(visible) { 4 | var imgTracker = new Image(1, 1); 5 | imgTracker.src = "/wle_tracker.gif?screensize=" + 6 | screen.width + "x" + screen.height + "&colordepth=" + screen.colorDepth + 7 | "&lang=" + (navigator.language ? navigator.language.toLowerCase() : navigator.browserLanguage.toLowerCase()) + 8 | "&fl=" + GetFlashVersion() + "&v=" + (visible ? "1" : "0") + "&r=" + Math.random().toString().slice(2, 10); 9 | } 10 | 11 | function GetFlashVersion() { 12 | try { 13 | if (navigator.plugins && navigator.plugins.length) { 14 | for (var i = 0; i < navigator.plugins.length; i++) 15 | if (navigator.plugins[i].name.indexOf("Shockwave Flash") != -1) { 16 | var arr = navigator.plugins[i].description.split("Shockwave Flash ")[1].replace(" r", ".").replace(" d", ".").replace(/\s/g, "").split("."); 17 | return arr[0] + "." + arr[1] + "." + arr[2]; 18 | } 19 | } 20 | else if (window.ActiveXObject) { 21 | var flashObj = new ActiveXObject("ShockwaveFlash.ShockwaveFlash"); 22 | if (flashObj) { 23 | var arr = flashObj.GetVariable("$version").split(" ")[1].split(","); 24 | return arr[0] + "." + arr[1] + "." + arr[2]; 25 | } 26 | } 27 | } 28 | catch (e) { 29 | } 30 | 31 | return "0"; 32 | } 33 | 34 | var isPrerendering = false; 35 | 36 | function handleVisibilityChange(evt) { 37 | if (isPrerendering && (document.webkitVisibilityState != "prerender")) { 38 | RequestTracker(true); 39 | isPrerendering = false; 40 | } 41 | } 42 | 43 | if (document.webkitVisibilityState != "prerender") 44 | RequestTracker(true); 45 | else { 46 | RequestTracker(false); 47 | isPrerendering = true; 48 | document.addEventListener("webkitvisibilitychange", handleVisibilityChange, false); 49 | } 50 | })(); -------------------------------------------------------------------------------- /web/routes/map.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var express = require('express'); 4 | var router = express.Router(); 5 | var bindingModels = require('../binding-models/binding-models'); 6 | 7 | function noCache(req, res, next) { 8 | res.header('Cache-Control', 'private, no-cache, no-store, must-revalidate'); 9 | res.header('Expires', '-1'); 10 | res.header('Pragma', 'no-cache'); 11 | next(); 12 | } 13 | 14 | // show map 15 | router.get('/map/:domain', function (req, res, next) { 16 | if (!req.params.domain) { 17 | // Not found, going to 404 handler 18 | next(); 19 | } 20 | 21 | res.render('map/index', { 22 | domain: req.params.domain 23 | }); 24 | }); 25 | 26 | router.get('/mapdata/:domain', noCache, function (req, res, next) { 27 | if (!req.params.domain) { 28 | // Not found, going to 404 handler 29 | next(); 30 | } 31 | 32 | // JSON data 33 | req.app.get('mappingClient').getSite(req.params.domain, true, req.params.contentsTimeStamp) 34 | .then(site => { 35 | if (site == null) { 36 | // error 404 37 | next(); 38 | } 39 | 40 | var data; 41 | 42 | try { 43 | data = new bindingModels.SiteMapDataModel(site, { 44 | maxNodesToShow: req.app.get('appConfig').get('mapper:maxNodesToShow') 45 | }); 46 | } 47 | catch (ex) { 48 | data = null; 49 | } 50 | 51 | if (data != null) { 52 | return res.json(data); 53 | } 54 | else { 55 | return res.status(500).send('Internal error'); 56 | } 57 | }) 58 | .catch(() => res.status(500).send('Internal error')); 59 | }); 60 | 61 | module.exports = router; 62 | -------------------------------------------------------------------------------- /web/routes/root.js: -------------------------------------------------------------------------------- 1 | var express = require('express'); 2 | var router = express.Router(); 3 | var siteModels = require('../../models/site-models'); 4 | 5 | // Home page 6 | router.get('/', function (req, res) { 7 | res.render('root/index', {}); 8 | }); 9 | 10 | router.post('/', function (req, res) { 11 | var domain = siteModels.SiteInfo.normalizeDomain(req.body.domain); 12 | if (domain) { 13 | res.redirect('/map/' + encodeURIComponent(domain)); 14 | } 15 | }); 16 | 17 | // About page 18 | router.get('/about', function (req, res) { 19 | res.render('root/about', {}); 20 | }); 21 | 22 | // Crawler page 23 | router.get('/crawler', function (req, res) { 24 | res.render('root/crawler', {}); 25 | }); 26 | 27 | module.exports = router; 28 | -------------------------------------------------------------------------------- /web/views/errors/404.html: -------------------------------------------------------------------------------- 1 | {% extends '../layouts/usual-layout.html' %} 2 | 3 | {% set title = 'File Not Found' %} 4 | 5 | {% block content %} 6 |

    {{ title }}

    7 | 12 | {% endblock %} 13 | -------------------------------------------------------------------------------- /web/views/errors/500.html: -------------------------------------------------------------------------------- 1 | {% extends '../layouts/usual-layout.html' %} 2 | 3 | {% set title = 'Server Error' %} 4 | 5 | {% block content %} 6 |

    {{ title }}

    7 |
     8 |         {{message}}
     9 |     
    10 | 11 | {% if error.status || error.stack %} 12 |
    13 |         {{error.status}}
    14 | {{error.stack}} 15 |
    > 16 | {% endif %} 17 | {% endblock %} 18 | -------------------------------------------------------------------------------- /web/views/layouts/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Visual Site Mapper - {{title}} 7 | 8 | 9 | 10 | {% for file in bundles.css.default %} 11 | 12 | {% endfor %} 13 | 14 | {% block styles %}{% endblock %} 15 | 16 | 25 | 26 | 27 |
    68 | 69 | {% block body %}{% endblock %} 70 | 71 |
    72 |
    73 |
    74 | © {{ now() | date('Y') }} Alentum Software Ltd. 75 | 76 | {% block footer-links %} 77 | Web log analytics by WebLog Expert 78 | {% endblock %} 79 | 80 |
    81 |
    82 | 83 | 84 | {% for file in bundles.js.default %} 85 | 86 | {% endfor %} 87 | 88 | 94 | 95 | {% block scripts %}{% endblock %} 96 | 97 | 98 | -------------------------------------------------------------------------------- /web/views/layouts/usual-layout.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | {% block body %} 4 |
    5 | {% block content %}{% endblock %} 6 |
    7 | {% endblock %} 8 | -------------------------------------------------------------------------------- /web/views/map/index.html: -------------------------------------------------------------------------------- 1 | {% extends '../layouts/layout.html' %} 2 | 3 | {% set title = domain %} 4 | {% set description = 'View a visual site map of ' + domain + ' at Visual Site Mapper - free service that can quickly show an interactive map of your site.' %} 5 | 6 | {% block styles %} 7 | 23 | 24 | {% for file in bundles.css.map %} 25 | 26 | {% endfor %} 27 | {% endblock %} 28 | 29 | {% block body %} 30 |
    31 |
    32 |
    33 | Highlight links: 34 |
    35 | 38 | 41 | 44 |
    45 |
    46 |
    47 |

    {{domain}}

    48 |
    49 |
    50 |
    51 | 52 | Status 53 |
    54 |
    55 |
    56 |
    57 | 58 |
    59 |
    Loading...
    60 | 61 | 63 |
    64 | {% endblock %} 65 | 66 | {% block footer-links %} 67 | IIS log analyzer - 68 | Apache log analyzer - 69 | Nginx log analyzer 70 | {% endblock %} 71 | 72 | {% block scripts %} 73 | {% for file in bundles.js.map %} 74 | 75 | {% endfor %} 76 | 77 | 82 | {% endblock %} -------------------------------------------------------------------------------- /web/views/root/about.html: -------------------------------------------------------------------------------- 1 | {% extends '../layouts/usual-layout.html' %} 2 | 3 | {% set title = 'About' %} 4 | 5 | {% block content %} 6 |

    {{ title }}

    7 | 8 |

    Visual Site Mapper is a free service that can quickly show a map of your site.

    9 |

    The service is created by Alentum Software Ltd., that is also the creator of 10 | the popular web log analytics tool WebLog Expert.

    11 |

    If you have question or suggestions on this service, please send them to support@alentum.com.

    12 | {% endblock %} 13 | 14 | 15 | -------------------------------------------------------------------------------- /web/views/root/crawler.html: -------------------------------------------------------------------------------- 1 | {% extends '../layouts/usual-layout.html' %} 2 | 3 | {% set title = 'Crawler' %} 4 | 5 | {% block content %} 6 |

    Visual Site Mapper Crawler

    7 | 8 |

    Visual Site Mapper is a service that shows maps of sites by crawling site pages. Users of our service can request a map of any site.

    9 | 10 |

    If you wish to disallow the Visual Site Mapper crawler to index all or some pages on your site, 11 | you can do it by adding rules to disallow access to these pages for crawler VSMCrawler to the robots.txt file.

    12 | 13 |

    For example:

    14 |

    15 | User-Agent: VSMCrawler
    16 | Disallow: / 17 |

    18 | {% endblock %} 19 | 20 | 21 | -------------------------------------------------------------------------------- /web/views/root/index.html: -------------------------------------------------------------------------------- 1 | {% extends '../layouts/usual-layout.html' %} 2 | 3 | {% set title = 'Create a visual map of your site' %} 4 | {% set doNotShowTopSearch = true %} 5 | 6 | {% block scripts %} 7 | 13 | {% endblock %} 14 | 15 | {% block content %} 16 |
    17 |
    18 | 19 | 20 | 21 |
    22 | 23 |
    24 |

    Samples:

    25 | 31 | 37 |
    38 | 39 | 45 |
    46 | {% endblock %} 47 | -------------------------------------------------------------------------------- /worker/index.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var config = require('../config'); 4 | var SiteRepository = require('../data-access/site-repository'); 5 | var MappingClient = require('../worker/mapping-client'); 6 | var MappingEngine = require('../worker/mapping-engine'); 7 | var log = require('../infrastructure/logger')('worker'); 8 | 9 | var siteRepository = new SiteRepository(); 10 | 11 | log.info('Worker started'); 12 | 13 | // Worker 14 | var mappingEngine, mappingEnginePromise; 15 | mappingEngine = new MappingEngine(siteRepository); 16 | mappingEnginePromise = mappingEngine.start(); 17 | 18 | function onExit() { 19 | if (mappingEngine && mappingEnginePromise) { 20 | mappingEngine.stop(); 21 | 22 | mappingEnginePromise 23 | .then(() => siteRepository.close()) 24 | .then(() => process.exit(0)); 25 | } 26 | } 27 | 28 | process.on('SIGINT', onExit); -------------------------------------------------------------------------------- /worker/mapping-client.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var siteModels = require('../models/site-models'); 4 | var Promise = require('bluebird'); 5 | 6 | function MappingClient(siteRepository, config) { 7 | var _siteRepository = siteRepository; 8 | var _refreshPeriodInDays = (config && config.refreshPeriodInDays) || 7; 9 | 10 | this.getSite = function (domain, includeContents, contentsTimeStamp) { 11 | domain = siteModels.SiteInfo.normalizeDomain(domain); 12 | if (!siteModels.SiteInfo.isValidDomain(domain)) { 13 | return Promise.reject('Invalid domain'); 14 | } 15 | 16 | return _siteRepository.getSite(domain, includeContents, contentsTimeStamp) 17 | .then(site => { 18 | var needToProcess = site == null; 19 | 20 | // Need to process as info is too old 21 | var msPerDay = 1000 * 60 * 60 * 24; 22 | var msPerHour = 1000 * 60 * 60; 23 | var msPerMinute = 1000 * 60; 24 | 25 | needToProcess = needToProcess || 26 | ((site != null) && (Date.now() - site.info.statusTime.getTime() > 7 * msPerDay)); 27 | 28 | // Need to process as there was a connection or robots.txt error 29 | needToProcess = needToProcess || ((site != null) && 30 | ((site.info.status == siteModels.SiteStatus.ConnectionProblem) || (site.info.status == siteModels.SiteStatus.RobotsTxtProblem)) && 31 | (Date.now() - site.info.statusTime.getTime() > 10 * msPerMinute)); 32 | 33 | // Need to process as processing seems to be interrupted 34 | needToProcess = needToProcess || ((site != null) && 35 | ((site.info.status == siteModels.SiteStatus.Added) || (site.info.status == siteModels.SiteStatus.Processing)) && 36 | (Date.now() - site.info.statusTime.getTime() > 1 * msPerHour)); 37 | 38 | if ((site != null) && !site.info.refreshEnabled) { 39 | needToProcess = false; 40 | } 41 | 42 | if (needToProcess) { 43 | site = new siteModels.Site(); 44 | site.info.domain = domain; 45 | 46 | return _siteRepository.saveSite(site) 47 | .then(() => _siteRepository.queueSiteForProcessing(domain)) 48 | .then(() => site) 49 | .catch(() => null); 50 | } 51 | 52 | return site; 53 | }); 54 | }; 55 | } 56 | 57 | module.exports = MappingClient; -------------------------------------------------------------------------------- /worker/mapping-engine.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var Promise = require('bluebird'); 4 | var SiteCrawler = require('./site-crawler/site-crawler.js'); 5 | var log = require('../infrastructure/logger')('worker'); 6 | 7 | function MappingEngine(siteRepository, config) { 8 | var self = this; 9 | 10 | var _siteRepository = siteRepository; 11 | var _maxCapacity = (config && config.maxCapacity) || 10; 12 | 13 | var _stopRequested = false; 14 | var _siteCrawlers = new Map(); 15 | var _sitePromises = new Map(); 16 | 17 | this.stop = function () { 18 | _stopRequested = true; 19 | 20 | var siteCount = _siteCrawlers.size; 21 | 22 | for (let crawler of _siteCrawlers.values()) { 23 | crawler.cancelProcessing(); 24 | } 25 | 26 | return Promise.all(Array.from(_sitePromises.values())) 27 | .then(() => log.info('Mapping engine: stopped processing all sites (%d)', siteCount)); 28 | }; 29 | 30 | this.start = function () { 31 | log.info('Mapping engine: started'); 32 | 33 | return processTasks(); 34 | }; 35 | 36 | function processTasks() { 37 | if (!_stopRequested) { 38 | if (_siteCrawlers.length >= _maxCapacity) { 39 | return Promise.race(getTaskPromises()) 40 | .then(processTasks) 41 | .catch(processTasks); 42 | } 43 | else { 44 | return _siteRepository.getNextSiteForProcessing() 45 | .then(domain => { 46 | if (!domain) { 47 | return Promise.delay(1000) 48 | .then(processTasks); 49 | } 50 | 51 | if (_siteCrawlers.has(domain)) { 52 | return processTasks(); 53 | } 54 | 55 | var crawler = new SiteCrawler(domain, _siteRepository); 56 | var promise = crawler.crawl() 57 | .finally(() => { 58 | _siteCrawlers.delete(domain); 59 | _sitePromises.delete(domain); 60 | }); 61 | 62 | _siteCrawlers.set(domain, crawler); 63 | _sitePromises.set(domain, promise); 64 | 65 | return processTasks(); 66 | }) 67 | .catch(() => Promise.delay(1000).then(processTasks)); 68 | } 69 | } 70 | } 71 | 72 | processTasks(); 73 | } 74 | 75 | module.exports = MappingEngine; -------------------------------------------------------------------------------- /worker/site-crawler/site-crawler.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | module.exports = SiteCrawler; 4 | 5 | var $ = require('cheerio'); 6 | var _ = require('lodash'); 7 | var Promise = require('bluebird'); 8 | var requestretry = require('requestretry'); 9 | var urlModule = require('url'); 10 | var robots = require('robots'); 11 | var iconv = require('iconv-lite'); 12 | var charset = require('charset'); 13 | var jschardet = require('jschardet'); 14 | var siteModels = require('../../models/site-models'); 15 | var log = require('../../infrastructure/logger')('worker'); 16 | 17 | function SiteCrawler(domain, siteRepository, config) { 18 | var self = this; 19 | 20 | if (!siteModels.SiteInfo.isValidDomain(domain)) { 21 | throw new Error("Invalid domain"); 22 | } 23 | 24 | if (siteRepository == null) { 25 | throw new Error("siteRepository is null"); 26 | } 27 | 28 | var _desiredNumberOfPages = (config && config.desiredNumberOfPages) || 220; 29 | var _crawlDelay = (config && config.crawlDelay) || 100; 30 | var _maxSimultaneousRequests = (config && config.maxSimultaneousRequests) || 20; 31 | var _siteRepository = siteRepository; 32 | 33 | var _domain = domain; 34 | var _rawDomain = _domain.startsWith("www.") ? _domain.substring(4) : _domain; 35 | 36 | this.getDomain = function () { 37 | return _domain; 38 | }; 39 | 40 | var _site; 41 | var _pageTasks; 42 | var _pageCache; 43 | var _processedPages; 44 | var _lastProcessedPages; 45 | var _lastSavedPages; 46 | var _processingProblemDescription; 47 | var _binaryExtensions = "arc arj bin com csv dll exe gz pdf rar tar txt zip bz2 cab msi gif jpg jpeg png mpeg mpg iso js css"; 48 | _binaryExtensions = ' ' + _binaryExtensions + ' '; 49 | var _cannotProcessRoot; 50 | var _connectionProblem; 51 | var _robotsTxtProblem; 52 | var _cancellationRequested = false; 53 | var _robotsParser; 54 | var _crawlingPromise; 55 | 56 | Object.defineProperty(this, 'crawlingPromise', { 57 | get: function() { 58 | return _crawlingPromise; 59 | }, 60 | enumerable: true 61 | }); 62 | 63 | this.crawl = function () { 64 | _site = new siteModels.Site(); 65 | _site.info.domain = _domain; 66 | _site.info.status = siteModels.SiteStatus.Processing; 67 | _site.contents = new siteModels.SiteContents(); 68 | _pageCache = {}; 69 | _processingProblemDescription = null; 70 | _processedPages = 0; 71 | _lastProcessedPages = 0; 72 | _lastSavedPages = 0; 73 | _cannotProcessRoot = false; 74 | _connectionProblem = false; 75 | _robotsTxtProblem = false; 76 | _pageTasks = []; 77 | 78 | log.info('Crawler (%s): Starting processing domain', domain); 79 | 80 | return _crawlingPromise = saveSite() 81 | .catch(() => {}) 82 | .then(() => retrieveRobotsTxt()) 83 | .catch(() => {}) 84 | .then(() => { 85 | if (_cancellationRequested) { 86 | return _siteRepository.removeSite(_domain) 87 | .then(() => { 88 | log.info('Crawler (%s): Cancellation requested, deleted site', _domain); 89 | return null; 90 | }) 91 | .catch(() => { 92 | log.info('Crawler (%s): Cancellation requested, delete site failed', _domain); 93 | return null; 94 | }); 95 | } 96 | 97 | var rootPage = addPage("http://" + _domain + "/"); 98 | rootPage.distanceFromRoot = 0; 99 | 100 | return addAndProcessPages() 101 | .then(() => { 102 | if (_cancellationRequested) { 103 | return _siteRepository.removeSite(_domain) 104 | .then(() => { 105 | log.info('Crawler (%s): Cancellation requested, deleted site', _domain); 106 | return null; 107 | }) 108 | .catch(() => { 109 | log.info('Crawler (%s): Cancellation requested, delete site failed', _domain); 110 | return null; 111 | }); 112 | } 113 | else { 114 | if (_connectionProblem) { 115 | _site.info.status = siteModels.SiteStatus.ConnectionProblem; 116 | } 117 | else if (_robotsTxtProblem) { 118 | _site.info.status = siteModels.SiteStatus.RobotsTxtProblem; 119 | } 120 | else { 121 | _site.info.status = _processingProblemDescription ? siteModels.SiteStatus.Processed : siteModels.SiteStatus.ProcessedWithProblems; 122 | } 123 | 124 | _site.info.statusDescription = _processingProblemDescription; 125 | return saveSite() 126 | .then(savedSite => { 127 | log.info('Crawler (%s): finished processing domain, saved %d pages and %d links', 128 | _domain, savedSite.info.pageCount, savedSite.info.linkCount); 129 | 130 | return savedSite; 131 | }) 132 | .catch(error => { 133 | log.info('Crawler (%s): finished processing domain, save failed', 134 | _domain); 135 | 136 | throw 'Saving site failed'; 137 | }); 138 | } 139 | }); 140 | }); 141 | }; 142 | 143 | this.cancelProcessing = function () { 144 | _cancellationRequested = true; 145 | log.info('Crawler (%s): cancellation requested', _domain); 146 | }; 147 | 148 | function addAndProcessPages() { 149 | if (!_cancellationRequested) { 150 | if (_processedPages > _lastProcessedPages + 20) { 151 | return saveSite() 152 | .catch(() => {}) 153 | .then(() => { 154 | _lastProcessedPages = _processedPages; 155 | return addAndProcessPages(); 156 | }); 157 | } 158 | 159 | return addPagesForProcessing() 160 | .then((res) => { 161 | if (res || (_pageTasks.length > 0)) { 162 | if (_pageTasks.length > 0) { 163 | return Promise.race(_pageTasks) 164 | .then(addAndProcessPages) 165 | } 166 | else { 167 | return Promise.delay(_crawlDelay) 168 | .then(addAndProcessPages); 169 | } 170 | } 171 | else { 172 | return _site; 173 | } 174 | }); 175 | } 176 | } 177 | 178 | function addPagesForProcessing() { 179 | var added = false; 180 | var pagesToAdd = []; 181 | var pagesToAddCount; 182 | 183 | if ((_lastSavedPages >= _desiredNumberOfPages) || (_processedPages >= _desiredNumberOfPages * 2)) { 184 | return Promise.resolve(added); 185 | } 186 | 187 | pagesToAddCount = _maxSimultaneousRequests - _pageTasks.length; 188 | if (pagesToAddCount > 0) { 189 | pagesToAdd = _site.contents.pages 190 | .filter(p => p.status == siteModels.PageStatus.Unprocessed) 191 | .sort((a, b) => a.distanceFromRoot - b.distanceFromRoot) 192 | .slice(0, pagesToAddCount); 193 | } 194 | else { 195 | return Promise.resolve(added); 196 | } 197 | 198 | return Promise.each(pagesToAdd, page => { 199 | page.status = siteModels.PageStatus.Processing; 200 | var promise = null; 201 | promise = processSinglePage(page) 202 | .then(() => _.pull( _pageTasks, promise)); 203 | _pageTasks.push(promise); 204 | 205 | added = true; 206 | 207 | return Promise.delay(_crawlDelay); 208 | }).then(() => added); 209 | } 210 | 211 | function isSuccessHttpStatusCode(statusCode) { 212 | return (statusCode >= 200) && (statusCode <= 299); 213 | } 214 | 215 | function isRedirectHttpStatusCode(statusCode) { 216 | return (statusCode >= 300) && (statusCode <= 399); 217 | } 218 | 219 | function isErrorHttpStatusCode(statusCode) { 220 | return (statusCode >= 400) && (statusCode <= 599); 221 | } 222 | 223 | function processSinglePage(page) { 224 | return new Promise(function (resolve, reject) { 225 | if (_cancellationRequested) { 226 | return resolve(); 227 | } 228 | 229 | if (page.status != siteModels.PageStatus.Processing) { 230 | return resolve(); 231 | } 232 | 233 | _processedPages++; 234 | if (_processedPages > _desiredNumberOfPages * 2) { 235 | return resolve(); 236 | } 237 | 238 | var url = urlModule.parse(page.url); 239 | 240 | // Checking if robots.txt allows spider to process the page 241 | if (_robotsParser && !_robotsParser.canFetchSync(SiteCrawler.userAgent, url.path)) { 242 | page.status = siteModels.PageStatus.UnprocessedBecauseOfRobotsTxt; 243 | if (page.id === 0) { // Root 244 | _cannotProcessRoot = true; 245 | _robotsTxtProblem = true; 246 | _processingProblemDescription = "Cannot process the site because of the robots.txt settings"; 247 | } 248 | return resolve(); 249 | } 250 | 251 | // Retrieving page 252 | var links; 253 | 254 | var requestOptions = { 255 | method: 'GET', 256 | url: page.url, 257 | timeout: 40 * 1000, 258 | followRedirect: false, 259 | encoding: null, // body will be binary instead of string 260 | headers: { 261 | 'User-Agent': SiteCrawler.userAgent 262 | }, 263 | // retry settings 264 | maxAttempts: 3, 265 | retryDelay: 5000, 266 | retryStrategy: requestretry.RetryStrategies.NetworkError 267 | }; 268 | 269 | requestretry(requestOptions, function (error, response, body) { 270 | if (error || isErrorHttpStatusCode(response.statusCode)) { 271 | if (response && response.statusCode) { 272 | page.httpStatus = response.statusCode; 273 | } 274 | 275 | page.status = siteModels.PageStatus.Error; 276 | 277 | if (page.id === 0) { // Root 278 | _cannotProcessRoot = true; 279 | _connectionProblem = !!error; 280 | _processingProblemDescription = 'Cannot get the home page of this site'; 281 | } 282 | 283 | return resolve(); 284 | } 285 | 286 | // Parsing page 287 | var title = ''; 288 | page.httpStatus = response.statusCode; 289 | 290 | if ((page.httpStatus == 301) || (page.httpStatus == 302) || (page.httpStatus == 303) || (page.httpStatus == 307) || (page.httpStatus == 308)) { 291 | links = new Set(); 292 | 293 | var location = response.headers.location; 294 | if (location) { 295 | var link = urlModule.resolve(page.url, location); 296 | 297 | if ((page.id === 0) && isExternalLink((link))) { // Root 298 | _cannotProcessRoot = true; 299 | _processingProblemDescription = "Home page of this site is redirected to another domain (" + link + ")"; 300 | } 301 | 302 | if (link) { 303 | links.add(link); 304 | } 305 | } 306 | } 307 | else if ((page.httpStatus >= 200) && (page.httpStatus <= 299)) { 308 | var contentType = (response.headers['content-type'] || '').toLowerCase(); 309 | if (!contentType.startsWith('text/html') && !contentType.startsWith('application/xhtml+xml')) { 310 | page.status = siteModels.PageStatus.Binary; 311 | 312 | if (page.id == 0) { // Root 313 | _cannotProcessRoot = true; 314 | _processingProblemDescription = "Cannot get the home page of this site"; 315 | } 316 | 317 | return resolve(); 318 | } 319 | 320 | var linkResult = getLinksAndTitleFromHtmlDocument(body, response.headers, page.url); 321 | links = linkResult.links; 322 | title = linkResult.title; 323 | } 324 | else if (page.id == 0) // Root 325 | { 326 | _cannotProcessRoot = true; 327 | _processingProblemDescription = "Cannot get the home page of this site"; 328 | } 329 | 330 | // Adding links 331 | if (links && (links.size > 0)) { 332 | var linkHash = new Set(); 333 | links.forEach(link => { 334 | if (!isExternalLink(link) && !hasBinaryExtension(link)) { 335 | linkHash.add(link); 336 | } 337 | }); 338 | 339 | processAddedLinks(page, linkHash); 340 | } 341 | 342 | page.status = siteModels.PageStatus.Processed; 343 | page.title = title; 344 | 345 | return resolve(); 346 | }); 347 | }); 348 | } 349 | 350 | function getLinksAndTitleFromHtmlDocument(body, headers, url) { 351 | var title = ''; 352 | var links = new Set(); 353 | var currentUrl = url; 354 | var baseUrl = url; 355 | 356 | var html; 357 | try { 358 | var encoding = charset(headers, body); 359 | encoding = encoding || jschardet.detect(body).encoding.toLowerCase(); 360 | html = $.load(iconv.decode(body, encoding)); 361 | } 362 | catch (ex) { 363 | html = $.load(body); 364 | } 365 | 366 | // Getting base URL if specified on page 367 | var st = html('head>base').attr('href'); 368 | if (st) { 369 | st = _.trim(st, ' /'); 370 | if (!st.toLowerCase().startsWith('http://') && !st.toLowerCase().startsWith('https://')) { 371 | st = 'http://' + st; 372 | } 373 | baseUrl = st; 374 | } 375 | 376 | // Getting title 377 | title = html('head>title').text(); 378 | 379 | // Getting links 380 | var rawUrls = []; 381 | 382 | html('a').each(function (i, elem) { 383 | var href = $(elem).attr('href'); 384 | 385 | if (href) { 386 | rawUrls.push(href); 387 | } 388 | }); 389 | 390 | html('frameset>frame').each(function (i, elem) { 391 | var src = $(elem).attr('src'); 392 | 393 | if (src) { 394 | rawUrls.push(src); 395 | } 396 | }); 397 | 398 | rawUrls.forEach(url => { 399 | if (!url.trim().toLowerCase().startsWith('javascript')) { 400 | var link = urlModule.resolve(baseUrl, url); 401 | if (link && (link != currentUrl)) { 402 | var iHash = link.indexOf('#'); 403 | if (iHash != -1) { 404 | link = link.substring(0, iHash); 405 | } 406 | 407 | if (((link.toLowerCase().startsWith("http://") || (link.toLowerCase().startsWith("https://")) && !links.has(link)))) { 408 | links.add(link); 409 | } 410 | } 411 | } 412 | }); 413 | 414 | return { 415 | links, 416 | title 417 | }; 418 | } 419 | 420 | function hasBinaryExtension(url) { 421 | var parsedUrl = urlModule.parse(url); 422 | 423 | if (parsedUrl && parsedUrl.pathname) { 424 | var st = parsedUrl.pathname; 425 | var i = st.lastIndexOf('.'); 426 | if (i > 0) { 427 | st = st.substring(i + 1); 428 | return st && _binaryExtensions.includes(' ' + st.toLowerCase() + ' '); 429 | } 430 | } 431 | 432 | return false; 433 | } 434 | 435 | function processAddedLinks(page, links) { 436 | links.forEach(link => { 437 | var linkedPage = getPage(link); 438 | 439 | if (linkedPage != page) { 440 | if (linkedPage == null) { 441 | linkedPage = addPage(link); 442 | linkedPage.distanceFromRoot = page.distanceFromRoot + 1; 443 | } 444 | else if (linkedPage.distanceFromRoot > page.distanceFromRoot + 1) { 445 | linkedPage.distanceFromRoot = page.distanceFromRoot + 1; 446 | } 447 | 448 | page.linksTo.push(linkedPage.id); 449 | } 450 | }); 451 | } 452 | 453 | function addPage(url) { 454 | var page = _pageCache[url]; 455 | 456 | if (!page) { 457 | page = new siteModels.Page(); 458 | page.url = url; 459 | page.id = _site.contents.pages.length; 460 | page.status = siteModels.PageStatus.Unprocessed; 461 | _pageCache[url] = page; 462 | _site.contents.pages.push(page); 463 | } 464 | 465 | return page; 466 | } 467 | 468 | function getPage(url) { 469 | return _pageCache[url]; 470 | } 471 | 472 | function normalizeUrlForDomain(url) { 473 | var parsedUrl = urlModule.parse(url); 474 | var st = parsedUrl.host && parsedUrl.host.toLowerCase(); 475 | 476 | if (st == _domain) { 477 | return url; 478 | } 479 | 480 | // The same domain (+/- www) 481 | if ((st == _rawDomain) || (st.startsWith("www.") && (st.substring(4) == _rawDomain))) 482 | { 483 | var i = url.toLowerCase().indexOf(st); 484 | if (i != -1) 485 | { 486 | url = url.substring(0, i) + _domain + url.substring(i + st.length); 487 | } 488 | } 489 | 490 | return url; 491 | } 492 | 493 | function saveSite() { 494 | var site = new siteModels.Site(); 495 | site.info.domain = _site.info.domain; 496 | site.info.progress = (_site.info.status == siteModels.SiteStatus.Processing) ? 497 | Math.min(99, Math.floor(_processedPages * 100 / (_desiredNumberOfPages * 2))) : 100; 498 | site.info.status = _site.info.status; 499 | site.info.statusDescription = _site.info.statusDescription; 500 | site.info.statusTime = new Date(); 501 | 502 | if (_cannotProcessRoot) 503 | { 504 | return _siteRepository.saveSite(site) 505 | .then(() => site); 506 | } 507 | 508 | // Creating contents 509 | site.contents = new siteModels.SiteContents(); 510 | 511 | var idsToPages = new Map(); 512 | var urlsToPages = new Map(); 513 | 514 | // Adding pages to dictionaries 515 | site.contents.pages = []; 516 | _site.contents.pages.forEach(page => { 517 | if ((page.status == siteModels.PageStatus.Processed) || (page.status == siteModels.PageStatus.Error)) { 518 | var pageToAdd = page.clone(); 519 | idsToPages.set(pageToAdd.id, pageToAdd); 520 | urlsToPages.set(pageToAdd.url, pageToAdd); 521 | } 522 | }); 523 | 524 | // Creating link hash 525 | var linkHash = new Set(); 526 | _site.contents.pages.forEach(page => { 527 | page.linksTo.forEach(endPageId => { 528 | if (idsToPages.has(endPageId)) 529 | { 530 | var id1 = page.id, id2 = endPageId; 531 | 532 | var st = normalizeUrlForDomain(page.url); 533 | if ((st != page.url) && urlsToPages.has(st)) { 534 | id1 = urlsToPages.get(st).id; 535 | } 536 | 537 | var page2 = idsToPages.get(id2); 538 | st = normalizeUrlForDomain(page2.url); 539 | if ((st != page2.url) && urlsToPages.has(st)) { 540 | id2 = urlsToPages.get(st).id; 541 | } 542 | 543 | if (id1 != id2) { 544 | linkHash.add(id1 + '-' + id2); 545 | } 546 | } 547 | }); 548 | }); 549 | 550 | // Adding page from this domain 551 | for (let page of idsToPages.values()) { 552 | var st = normalizeUrlForDomain(page.url); 553 | if (st == page.url) { 554 | site.contents.pages.push(page); 555 | } 556 | } 557 | 558 | // Adding non-duplicate pages from the domain with/without www 559 | for (let page of idsToPages.values()) { 560 | var st = normalizeUrlForDomain(page.url); 561 | if (st != page.url) { 562 | if (!urlsToPages.has(st)) { 563 | page.url = st; 564 | site.contents.pages.push(page); 565 | } 566 | else 567 | { 568 | var existingPage = urlsToPages.get(st); 569 | if (!existingPage.title && page.title) { 570 | existingPage.title = page.title; 571 | } 572 | } 573 | } 574 | } 575 | 576 | // Adding links 577 | linkHash.forEach(pair => { 578 | var i = pair.indexOf('-'); 579 | var id1 = Number(pair.substring(0, i)); 580 | var id2 = Number(pair.substring(i + 1)); 581 | 582 | var page = idsToPages.get(id1); 583 | page.linksTo.push(id2); 584 | }); 585 | 586 | for (let page of site.contents.pages) { 587 | page.linksTo.sort((a, b) => a - b); 588 | } 589 | 590 | site.info.pageCount = site.contents.pages.length; 591 | site.info.linkCount = linkHash.size; 592 | 593 | _lastSavedPages = site.info.pageCount; 594 | 595 | site.info.progress = (_site.info.status == siteModels.SiteStatus.Processing) ? 596 | Math.min(99, Math.floor(site.contents.pages.length * 100 / _desiredNumberOfPages)) : 100; 597 | 598 | return _siteRepository.saveSite(site) 599 | .then(() => site); 600 | } 601 | 602 | function getRawDomain(url) { 603 | var parsedUrl = urlModule.parse(url); 604 | 605 | if (parsedUrl.hostname) { 606 | var st = parsedUrl.hostname; 607 | if (st.startsWith('www.')) { 608 | st = st.substring(4); 609 | } 610 | 611 | return st; 612 | } 613 | else { 614 | return url; 615 | } 616 | } 617 | 618 | function isExternalLink(link) { 619 | var domain = getRawDomain(link); 620 | 621 | // The same domain (+/- www) 622 | if (domain === _rawDomain) { 623 | return false; 624 | } 625 | 626 | // Subdomain 627 | var i = domain.lastIndexOf("." + _rawDomain); 628 | if ((i > 0) && (i == domain.length - _rawDomain.length - 1)) { 629 | return false; 630 | } 631 | 632 | // External 633 | return true; 634 | } 635 | 636 | function retrieveRobotsTxt() { 637 | return new Promise(function (resolve, reject) { 638 | _robotsParser = null; 639 | 640 | var requestOptions = { 641 | method: 'GET', 642 | url: 'http://' + _domain + '/robots.txt', 643 | timeout: 30 * 1000, 644 | followRedirect: true, 645 | headers: { 646 | 'User-Agent': SiteCrawler.userAgent 647 | }, 648 | // retry settings 649 | maxAttempts: 3, 650 | retryDelay: 5000, 651 | retryStrategy: requestretry.RetryStrategies.NetworkError 652 | }; 653 | 654 | requestretry(requestOptions, function (error, response, body) { 655 | if (!error && isSuccessHttpStatusCode(response.statusCode) && body) { 656 | _robotsParser = new robots.RobotsParser(); 657 | _robotsParser.parse(body.split(/\r\n|\r|\n/)); 658 | } 659 | 660 | if (!error) { 661 | resolve(); 662 | } 663 | else { 664 | reject(error); 665 | } 666 | }); 667 | }); 668 | } 669 | } 670 | 671 | Object.defineProperty(SiteCrawler, 'userAgent', { 672 | value: 'Mozilla/5.0 (compatible; VSMCrawler; http://www.visualsitemapper.com/crawler/)' 673 | }); 674 | 675 | Object.defineProperty(SiteCrawler, 'userAgentForRobotsTxt', { 676 | value: 'VSMCrawler' 677 | }); -------------------------------------------------------------------------------- /worker/site-crawler/test/crawler-test.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var SiteCrawler = require('../site-crawler.js'); 4 | 5 | //var siteDomain = process.argv[2]; 6 | var siteDomain = 'www.alentum.com'; 7 | 8 | var SiteRepository = require('../../../data-access/site-repository'); 9 | var siteRepository = new SiteRepository(); 10 | 11 | var siteCrawler = new SiteCrawler(siteDomain, siteRepository); 12 | siteCrawler.crawl() 13 | .then(site => console.log(JSON.stringify(site, null, 4))) 14 | .catch(err => console.log('Cannot process site: %s', err)) 15 | .finally(() => siteRepository.close()); -------------------------------------------------------------------------------- /worker/site-crawler/test/promise-test.js: -------------------------------------------------------------------------------- 1 | var Promise = require('bluebird'); 2 | 3 | function test1() { 4 | "use strict"; 5 | return Promise.reject(1); 6 | } 7 | 8 | function test() { 9 | "use strict"; 10 | return test1().then(value => console.log('then: ' + value)) 11 | .finally(function() { 12 | console.log('finally: ' + arguments) 13 | }); 14 | } 15 | 16 | test().then(value => console.log('then: ' + value)) 17 | .catch(value => console.log('catch: ' + value)); -------------------------------------------------------------------------------- /worker/utils/objectExtensions.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | defineAutoProperty: function(o, key, initialValue) { 3 | Object.defineProperty(o, 'startPageId', { 4 | value: initialValue, 5 | writable: true, 6 | enumerable: true 7 | }); 8 | } 9 | }; --------------------------------------------------------------------------------