├── OSMHighwayMetrics.js ├── OSMQualityMetrics.js ├── README.md ├── UserStats.js ├── example-output ├── OSMQualityMetrics │ ├── metrostats.csv │ ├── relations.csv │ ├── tiger.csv │ └── userstats.csv └── UserStats │ └── userstats.csv ├── extras ├── qgis │ ├── styled-ways-example.png │ └── ways-age-version.qml └── sql │ ├── loaduserstats.sh │ └── userstats_load.sql ├── scripts ├── generate_stats.py └── process-highwaystats-results.py └── utils └── UserStats.py /OSMHighwayMetrics.js: -------------------------------------------------------------------------------- 1 | /* 2 | * OSMHighwayMetrics.js 3 | * ==================== 4 | * This is an OSMJS script that generates highway stats for US planets 5 | * 6 | * Setup 7 | * ===== 8 | * You need the osmium framework for it to work, get osmium at 9 | * https://github.com/joto/osmium and follow the install instructions 10 | * given there. With Osmium set up, build OSMJS following the 11 | * instructions at https://github.com/joto/osmium/tree/master/osmjs 12 | * 13 | * Running the script 14 | * ================== 15 | * With OSMJS compiled, run the script: 16 | * /path/to/osmjs -j OSMHighwayMetrics.js /path/to/data.osm [prefix] 17 | * 18 | * The script will generate one output file: 19 | * - [prefix_]highwaystats.csv : Highway statistics. 20 | * - [prefix_]userstats.csv : users involved with the highways. 21 | * 22 | * Notes 23 | * ===== 24 | * - The script will save its output files in the current working 25 | * directory. 26 | * 27 | * License 28 | * ======= 29 | * Copyright (c) 2011 Martijn van Exel 30 | * 31 | * Permission is hereby granted, free of charge, to any person obtaining 32 | * a copy of this software and associated documentation files (the 33 | * "Software"), to deal in the Software without restriction, including 34 | * without limitation the rights to use, copy, modify, merge, publish, 35 | * distribute, sublicense, and/or sell copies of the Software, and to 36 | * permit persons to whom the Software is furnished to do so, subject 37 | * to the following conditions: 38 | * 39 | * The above copyright notice and this permission notice shall be 40 | * included in all copies or substantial portions of the Software. 41 | * 42 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 43 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 44 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 45 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 46 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 47 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 48 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 49 | * SOFTWARE. 50 | */ 51 | 52 | /* 53 | * CONFIGURATION 54 | */ 55 | 56 | // Here you can optionally set a directory for the output. If not set, 57 | // the output will be written to your current working directory. 58 | // Should end with a trailing slash. 59 | var OUT_DIR = ''; 60 | 61 | // Known bots -- these will not be taken into account for the userstats 62 | var known_bots = ['woodpeck_fixbot', 'balrog-kun', 'nhd-import', 'TIGERcnl', 'DaveHansenTiger']; 63 | 64 | // Highway values that are not part of the navigable road network 65 | var navigablehighwayvalues = ['motorway','motorway_link','trunk','trunk_link','primary','primary_link','secondary','secondary_link','tertiary','residential','unclassified','living_street','road','service']; 66 | 67 | /* 68 | * You should only modify the code below this line if you're familiar 69 | * with JavaScript and OSMJS 70 | * ===================================================================== 71 | */ 72 | 73 | // GLOBALS 74 | var users = []; 75 | var ages = []; 76 | var doingnodes = false, doingways = false, doingrelations = false; 77 | var nodes = 0, ways = 0,relations = 0; 78 | var ways = 0, highways = 0, navigablehighways = 0, tigerways = 0; 79 | var tigeruntouchedways = 0, tigerversionincrease = 0; 80 | var t0, t1, tnodes0, tnodes1, tways1, trelations1; 81 | 82 | // Get the outpuf file prefix from the command line argument. 83 | outprefix = argv.length == 1 ? argv[0] + "_" : ""; 84 | print(outprefix); 85 | 86 | function User(uid,name) { 87 | this.uid=uid; 88 | this.name=name; 89 | this.ways=0; 90 | this.highways = 0; 91 | this.navigablehighways = 0; 92 | this.tigerways = 0; 93 | } 94 | 95 | function sort_by_tigerways(a,b) { 96 | return (a.tigerways < b.tigerways) ? 1 : (a.tigerways > b.tigerways) ? -1 : 0; 97 | } 98 | 99 | Osmium.Callbacks.init = function() { 100 | print('Running...'); 101 | t0 = new Date(); 102 | } 103 | 104 | Osmium.Callbacks.node = function() { 105 | if (!doingnodes) { 106 | // The before_* callbacks are not called, so we need a workaround. 107 | doingnodes = true; 108 | tnodes0 = new Date(); 109 | print('parsing nodes...'); 110 | } 111 | nodes+=1; 112 | } 113 | 114 | Osmium.Callbacks.way = function() { 115 | if (doingnodes) { 116 | // The before_* callbacks are not called, so we need a workaround. 117 | doingnodes = false; 118 | doingways = true; 119 | tnodes1 = new Date(); 120 | print('parsing ways...'); 121 | } 122 | 123 | var highway = 0; 124 | var tiger = 0; 125 | var navigable = 0; 126 | 127 | if(!users[this.uid]) { 128 | users[this.uid] = new User(this.uid,this.user); 129 | } 130 | 131 | users[this.uid].ways+=1; 132 | 133 | ways++; 134 | 135 | for(var key in this.tags) { 136 | highway += (key == 'highway') ? 1:0; 137 | tiger += (key.match(/tiger/ig)) ? 1:0; 138 | navigable += (key == 'highway' && navigablehighwayvalues.indexOf(this.tags[key]) > -1) ? 1:0; 139 | } 140 | 141 | if(highway>0) { 142 | users[this.uid].highways++; 143 | highways++; 144 | } 145 | 146 | if(tiger>0) { 147 | tigerways++; 148 | if(this.version==1) tigeruntouchedways++; 149 | else users[this.uid].tigerways += 1; 150 | tigerversionincrease = tigerversionincrease + (this.version - 1 - tigerversionincrease) / tigerways; 151 | } 152 | 153 | if (navigable>0) { 154 | navigablehighways++; 155 | users[this.uid].navigablehighways++; 156 | } 157 | 158 | } 159 | 160 | Osmium.Callbacks.relation = function() { 161 | if (doingways) { 162 | // The before_* callbacks are not called, so we need a workaround. 163 | doingways = false; 164 | doingrelations = true; 165 | tways1 = new Date(); 166 | print('parsing relations...'); 167 | } 168 | relations+=1; 169 | } 170 | 171 | Osmium.Callbacks.end = function() { 172 | print('output and cleanup...'); 173 | 174 | // CLEAN UP 175 | trelations1 = new Date(); 176 | users.sort(sort_by_tigerways); 177 | 178 | var outuserstats = Osmium.Output.CSV.open(OUT_DIR + outprefix + 'userstats.csv'); 179 | outuserstats.print('#\tuid\tusername\tways\thighways\ttigerways\tnavigablehighways\tprecentile'); 180 | var cumulativetiger = 0; 181 | var grandtotal = nodes + ways + relations; 182 | var realusercnt = 0; 183 | 184 | var botnodes = 0; 185 | var botways = 0; 186 | var botrelations = 0; 187 | 188 | for (var i=0;i -1) { 192 | botnodes += users[i].nodes; 193 | botways += users[i].ways; 194 | botrelations += users[i].relations; 195 | continue; 196 | }; 197 | }; 198 | // SECOND PASS 199 | for (var i=0; i -1) continue; 202 | cumulativetiger += users[i].tigerways; 203 | // outuserstats.print(users[i].uid, users[i].name, users[i].nodes, users[i].ways, users[i].relations, cumfeatures / (grandtotal - botnodes - botways - botrelations)); 204 | outuserstats.print(i+1, users[i].uid, users[i].name, users[i].ways, users[i].highways, users[i].tigerways, users[i].navigablehighways, cumulativetiger / (tigerways - tigeruntouchedways)); 205 | } 206 | outuserstats.close(); 207 | 208 | // WRITE BASE STATS 209 | var outhighways = Osmium.Output.CSV.open(OUT_DIR + outprefix + 'highwaystats.csv'); 210 | 211 | outhighways.print('total nodes',nodes) 212 | outhighways.print('total ways',ways) 213 | outhighways.print('total relations',relations) 214 | outhighways.print('total users involved in ways',realusercnt) 215 | outhighways.print('amt highways',highways); 216 | outhighways.print('pct highways', highways/ways); 217 | outhighways.print('amt navigable highways',navigablehighways); 218 | outhighways.print('pct navigable highways', navigablehighways/highways); 219 | outhighways.print('amt tiger ways',tigerways); 220 | outhighways.print('pct tiger ways',tigerways/ways); 221 | outhighways.print('amt untouched tiger',tigeruntouchedways); 222 | outhighways.print('pct untouched tiger',tigeruntouchedways / tigerways); 223 | outhighways.print('avg increase over TIGER',tigerversionincrease); 224 | 225 | outhighways.close(); 226 | 227 | // OUTPUT TIMINGS 228 | t1 = new Date(); 229 | var tnodes=tnodes1-tnodes0;tways=tways1-tnodes1;trelations=trelations1-tways1; 230 | print('finished!\nTimings:\ntotal: ' + (t1-t0) + ' ms\n---------------------\nnodes: ' + tnodes + 'ms\nways: ' + tways + 'ms\nrelations: ' + trelations + 'ms\noverhead: ' + ((t1-t0)-(tnodes+tways+trelations)) + 'ms'); 231 | } 232 | -------------------------------------------------------------------------------- /OSMQualityMetrics.js: -------------------------------------------------------------------------------- 1 | /* 2 | * OSMQualityMetrics.js 3 | * ==================== 4 | * This is an OSMJS script that generates general statistics as well as 5 | * quality metrics for any OSM file you throw at it. 6 | * 7 | * Setup 8 | * ===== 9 | * You need the osmium framework for it to work, get osmium at 10 | * https://github.com/joto/osmium and follow the install instructions 11 | * given there. With Osmium set up, build OSMJS following the 12 | * instructions at https://github.com/joto/osmium/tree/master/osmjs 13 | * 14 | * Running the script 15 | * ================== 16 | * With OSMJS compiled, run the script: 17 | * /path/to/osmjs -j OSMQualityMetrics.js -l array /path/to/data.osm 18 | * 19 | * The script will generate a number of output files: 20 | * - ways.* : A shapefile containing all way geometries with version and 21 | * timestamp attributes. 22 | * - metrostats.csv : The data metrics. 23 | * - userstats.csv : User statistics. 24 | * 25 | * Notes 26 | * ===== 27 | * - if you don't need the ways shapefile, you can set the OUTPUT_WAYS 28 | * variable to false in the script. You can also leave out the -l 29 | * parameter when running the script and speed things up. 30 | * - The -l array script is best for large OSM files. If you're working 31 | * with smaller, city-sized OSM data files. run OSMJS with the -h option 32 | * for more info. 33 | * - The script will save its output files in the current working 34 | * directory. 35 | * 36 | * License 37 | * ======= 38 | * Copyright (c) 2011 Martijn van Exel 39 | * 40 | * Permission is hereby granted, free of charge, to any person obtaining 41 | * a copy of this software and associated documentation files (the 42 | * "Software"), to deal in the Software without restriction, including 43 | * without limitation the rights to use, copy, modify, merge, publish, 44 | * distribute, sublicense, and/or sell copies of the Software, and to 45 | * permit persons to whom the Software is furnished to do so, subject 46 | * to the following conditions: 47 | * 48 | * The above copyright notice and this permission notice shall be 49 | * included in all copies or substantial portions of the Software. 50 | * 51 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 52 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 53 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 54 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 55 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 56 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 57 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 58 | * SOFTWARE. 59 | 60 | */ 61 | 62 | /* 63 | * CONFIGURATION 64 | */ 65 | 66 | // This controls the output of way geometries. If you set this to true, 67 | // don't forget you will need to set the -l parameter when running. 68 | var OUTPUT_WAYS = true; 69 | 70 | // Here you can optionally set a directory for the output. If not set, 71 | // the output will be written to your current working directory. 72 | // Should end with a trailing slash. 73 | var OUT_DIR = ''; 74 | 75 | // These are the thresholds for the age distribution 76 | // Defaults are 30 days, 90 days, 180 days, 365 days, 730 days 77 | var day = 60*60*24; // Don't touch this. It's just a convenience var. 78 | var thresholds = [30*day, 90*day, 180*day, 365*day, 2*365*day]; 79 | 80 | // These are the keys that are considered when counting rich nodes 81 | var poikeys = {leisure:1,amenity:1,office:1,shop:1,craft:1,tourism:1,historic:1}; 82 | var transportkeys = {highway:1,barrier:1,cycleway:1,tracktype:1,waterway:1,railway:1,aeroway:1,aerialway:1,public_transport:1,power:1} 83 | var namekeys = {name:1,ref:1,place:1,addr:1} 84 | 85 | // This is the reference date from which the age statistics are 86 | // calculated. The date should coincide with the timestamp of the 87 | // OSM file you are analyzing. 88 | var REF_DATE = new Date("October 19, 2011 00:00:00"); 89 | 90 | /* 91 | * You should only modify the code below this line if you're familiar 92 | * with JavaScript and OSMJS 93 | * ===================================================================== 94 | */ 95 | 96 | // GLOBALS 97 | var shp; 98 | var users = []; 99 | var nodes = {}; 100 | var ages = []; 101 | var tigerbreakdown = {}; 102 | var relation_types = {}; 103 | var doingnodes = false, doingways = false, doingrelations = false; 104 | var nodecnt = 0, poicnt = 0, transportcnt = 0, namecnt = 0, waycnt = 0,relationcnt = 0, usercnt = 0; 105 | var nodetags = 0, waytags = 0, relationtags = 0; 106 | var ranking = {nodes:1,ways:3,relations:9}; 107 | var avgnodeversion = 0, avgwayversion = 0, avgrelationversion = 0; 108 | var tigerways = 0; var tiger_untouched=0;var tigerversionincrease = 0; 109 | var t0, t1, tnodes0, tnodes1, tways1, trelations1; 110 | 111 | function User(uid,name) { 112 | this.uid=uid; 113 | this.name=name; 114 | this.nodes=0; 115 | this.ways=0; 116 | this.relations=0; 117 | } 118 | 119 | User.prototype.rank = function(){return this.nodes*ranking.nodes + this.ways*ranking.ways+ this.relations*ranking.relations}; 120 | 121 | function calculate_percentiles(ary) { 122 | var cohorts = [0,0,0,0,0,0]; 123 | var now = Math.round(REF_DATE.getTime()/1000); 124 | for(var i=0;i=0;j-=1) { 128 | if(t<(now-thresholds[j])) { 129 | cohorts[j+1]+=1; 130 | cohorted = true; 131 | break; 132 | } 133 | } 134 | if(!cohorted) cohorts[0]+=1; 135 | } 136 | for(var j=cohorts.length-1;j>=0;j-=1) { 137 | cohorts[j] = ((cohorts[j] / ary.length) * 100).toFixed(1); 138 | } 139 | return cohorts; 140 | } 141 | 142 | function sort_by_rank(a,b) { 143 | return ((a.rank() < b.rank()) ? 1 : (a.rank() > b.rank()) ? -1 : 0); 144 | } 145 | 146 | function sort_by_totals(a,b) { 147 | return ((a.nodes + a.ways + a.relations) < (b.nodes + b.ways + b.relations) ? 1 : (a.nodes + a.ways + a.relations) > (b.nodes + b.ways + b.relations) ? -1 : 0); 148 | } 149 | 150 | 151 | Osmium.Callbacks.init = function() { 152 | print('Running...'); 153 | t0 = new Date(); 154 | if(OUTPUT_WAYS) { 155 | shp = Osmium.Output.Shapefile.open(OUT_DIR + 'ways', 'line'); 156 | shp.add_field('id', 'integer', 10); 157 | shp.add_field('name', 'string', 40); 158 | shp.add_field('version','integer',5); 159 | shp.add_field('timestamp','integer', 16); 160 | } 161 | } 162 | 163 | Osmium.Callbacks.node = function() { 164 | if (!doingnodes) { 165 | // The before_* callbacks are not called, so we need a workaround. 166 | doingnodes = true; 167 | tnodes0 = new Date(); 168 | print('parsing nodes...'); 169 | } 170 | 171 | if(!users[this.uid]) { 172 | users[this.uid] = new User(this.uid,this.user); 173 | usercnt += 1; 174 | } 175 | users[this.uid].nodes+=1; 176 | for(var key in this.tags) { 177 | nodetags+=1; 178 | if (key in poikeys) poicnt += 1; 179 | if (key in transportkeys) transportcnt += 1; 180 | if (key in namekeys) namecnt += 1; 181 | } 182 | nodecnt+=1; 183 | nodes[this.id] = 0; 184 | ages.push(Math.round(new Date(this.timestamp).getTime()/1000)); 185 | avgnodeversion = avgnodeversion + (this.version - avgnodeversion) / nodecnt; 186 | } 187 | 188 | Osmium.Callbacks.way = function() { 189 | /* todo: 190 | * average length of road segments 191 | * shapes for certain tiger features 192 | * turn restrictions 193 | */ 194 | 195 | if(OUTPUT_WAYS) { 196 | shp.add(this.geom, { id: this.id, name: this.tags.name, version: this.version, timestamp: Math.round(new Date(this.timestamp).getTime()/1000) }); 197 | } 198 | 199 | if (doingnodes) { 200 | // The before_* callbacks are not called, so we need a workaround. 201 | doingnodes = false; 202 | doingways = true; 203 | tnodes1 = new Date(); 204 | print('parsing ways...'); 205 | } 206 | var tiger = false; 207 | if(!users[this.uid]) { 208 | users[this.uid] = new User(this.uid,this.user); 209 | usercnt += 1; 210 | } 211 | users[this.uid].ways+=1; 212 | waycnt+=1; 213 | ages.push(Math.round(new Date(this.timestamp).getTime()/1000)); 214 | for (var i=0; i < this.nodes.length; i++) { 215 | nodes[this.nodes[i]] = 1; 216 | } 217 | for(var key in this.tags) { 218 | waytags+=1; 219 | tiger=(key.match(/tiger/ig)) 220 | if(key.match(/tiger:cfcc/ig)) { 221 | tigerbreakdown[this.tags[key]] = isNaN(tigerbreakdown[this.tags[key]]) ? 1 : tigerbreakdown[this.tags[key]] + 1; 222 | } 223 | } 224 | if(tiger) { 225 | tigerways++; 226 | if(this.version==1) tiger_untouched++; 227 | tigerversionincrease = tigerversionincrease + (this.version - 1 - tigerversionincrease) / tigerways; 228 | } 229 | avgwayversion = avgwayversion + (this.version - avgwayversion) / waycnt; 230 | } 231 | 232 | Osmium.Callbacks.relation = function() { 233 | if (doingways) { 234 | // The before_* callbacks are not called, so we need a workaround. 235 | doingways = false; 236 | doingrelations = true; 237 | tways1 = new Date(); 238 | print('parsing relations...'); 239 | } 240 | 241 | if(!users[this.uid]) { 242 | users[this.uid] = new User(this.uid,this.user); 243 | usercnt += 1; 244 | } 245 | users[this.uid].relations+=1; 246 | relationcnt+=1; 247 | ages.push(Math.round(new Date(this.timestamp).getTime()/1000)); 248 | for(var key in this.tags) { 249 | relationtags+=1; 250 | if (key.match(/type/i)) { 251 | relation_types[this.tags[key]] = isNaN(relation_types[this.tags[key]]) ? 1 : relation_types[this.tags[key]] + 1; 252 | } 253 | } 254 | avgrelationversion = avgrelationversion + (this.version - avgrelationversion) / relationcnt; 255 | } 256 | 257 | Osmium.Callbacks.end = function() { 258 | print('output and cleanup...'); 259 | 260 | // CLEAN UP 261 | trelations1 = new Date(); 262 | users.sort(sort_by_totals); 263 | if(OUTPUT_WAYS) shp.close(); 264 | 265 | var out = Osmium.Output.CSV.open(OUT_DIR + 'userstats.csv'); 266 | out.print('uid\tusername\tnodes\tways\trelations\tpercentile'); 267 | var cumfeatures = 0; 268 | var grandtotal = nodecnt + waycnt + relationcnt; 269 | var realusercnt = 0; 270 | var user_thresholds = [0.9,0.95, 0.99]; 271 | var user_threshold_met = 0; 272 | var users_for_threshold = []; 273 | var userperc_for_threshold = []; 274 | 275 | // WRITE USER STATS TO FILE 276 | for (var i=0;i user_thresholds[user_threshold_met]) { 282 | users_for_threshold.push(i+1); 283 | user_threshold_met +=1; 284 | } 285 | } 286 | 287 | for(var i=0;i 20 | * MIT license 21 | * 22 | * Includes enhancements by Scott Trenda 23 | * and Kris Kowal 24 | * 25 | * Accepts a date, a mask, or a date and a mask. 26 | * Returns a formatted version of the given date. 27 | * The date defaults to the current date/time. 28 | * The mask defaults to dateFormat.masks.default. 29 | */ 30 | 31 | var dateFormat = function () { 32 | var token = /d{1,4}|m{1,4}|yy(?:yy)?|([HhMsTt])\1?|[LloSZ]|"[^"]*"|'[^']*'/g, 33 | timezone = /\b(?:[PMCEA][SDP]T|(?:Pacific|Mountain|Central|Eastern|Atlantic) (?:Standard|Daylight|Prevailing) Time|(?:GMT|UTC)(?:[-+]\d{4})?)\b/g, 34 | timezoneClip = /[^-+\dA-Z]/g, 35 | pad = function (val, len) { 36 | val = String(val); 37 | len = len || 2; 38 | while (val.length < len) val = "0" + val; 39 | return val; 40 | }; 41 | 42 | // Regexes and supporting functions are cached through closure 43 | return function (date, mask, utc) { 44 | var dF = dateFormat; 45 | 46 | // You can't provide utc if you skip other args (use the "UTC:" mask prefix) 47 | if (arguments.length == 1 && Object.prototype.toString.call(date) == "[object String]" && !/\d/.test(date)) { 48 | mask = date; 49 | date = undefined; 50 | } 51 | 52 | // Passing date through Date applies Date.parse, if necessary 53 | date = date ? new Date(date) : new Date; 54 | if (isNaN(date)) throw SyntaxError("invalid date"); 55 | 56 | mask = String(dF.masks[mask] || mask || dF.masks["default"]); 57 | 58 | // Allow setting the utc argument via the mask 59 | if (mask.slice(0, 4) == "UTC:") { 60 | mask = mask.slice(4); 61 | utc = true; 62 | } 63 | 64 | var _ = utc ? "getUTC" : "get", 65 | d = date[_ + "Date"](), 66 | D = date[_ + "Day"](), 67 | m = date[_ + "Month"](), 68 | y = date[_ + "FullYear"](), 69 | H = date[_ + "Hours"](), 70 | M = date[_ + "Minutes"](), 71 | s = date[_ + "Seconds"](), 72 | L = date[_ + "Milliseconds"](), 73 | o = utc ? 0 : date.getTimezoneOffset(), 74 | flags = { 75 | d: d, 76 | dd: pad(d), 77 | ddd: dF.i18n.dayNames[D], 78 | dddd: dF.i18n.dayNames[D + 7], 79 | m: m + 1, 80 | mm: pad(m + 1), 81 | mmm: dF.i18n.monthNames[m], 82 | mmmm: dF.i18n.monthNames[m + 12], 83 | yy: String(y).slice(2), 84 | yyyy: y, 85 | h: H % 12 || 12, 86 | hh: pad(H % 12 || 12), 87 | H: H, 88 | HH: pad(H), 89 | M: M, 90 | MM: pad(M), 91 | s: s, 92 | ss: pad(s), 93 | l: pad(L, 3), 94 | L: pad(L > 99 ? Math.round(L / 10) : L), 95 | t: H < 12 ? "a" : "p", 96 | tt: H < 12 ? "am" : "pm", 97 | T: H < 12 ? "A" : "P", 98 | TT: H < 12 ? "AM" : "PM", 99 | Z: utc ? "UTC" : (String(date).match(timezone) || [""]).pop().replace(timezoneClip, ""), 100 | o: (o > 0 ? "-" : "+") + pad(Math.floor(Math.abs(o) / 60) * 100 + Math.abs(o) % 60, 4), 101 | S: ["th", "st", "nd", "rd"][d % 10 > 3 ? 0 : (d % 100 - d % 10 != 10) * d % 10] 102 | }; 103 | 104 | return mask.replace(token, function ($0) { 105 | return $0 in flags ? flags[$0] : $0.slice(1, $0.length - 1); 106 | }); 107 | }; 108 | }(); 109 | 110 | // Some common format strings 111 | dateFormat.masks = { 112 | "default": "ddd mmm dd yyyy HH:MM:ss", 113 | shortDate: "m/d/yy", 114 | mediumDate: "mmm d, yyyy", 115 | longDate: "mmmm d, yyyy", 116 | fullDate: "dddd, mmmm d, yyyy", 117 | shortTime: "h:MM TT", 118 | mediumTime: "h:MM:ss TT", 119 | longTime: "h:MM:ss TT Z", 120 | isoDate: "yyyy-mm-dd", 121 | isoTime: "HH:MM:ss", 122 | isoDateTime: "yyyy-mm-dd'T'HH:MM:ss", 123 | isoUtcDateTime: "UTC:yyyy-mm-dd'T'HH:MM:ss'Z'" 124 | }; 125 | 126 | 127 | // Internationalization strings 128 | dateFormat.i18n = { 129 | dayNames: [ 130 | "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", 131 | "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday" 132 | ], 133 | monthNames: [ 134 | "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", 135 | "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" 136 | ] 137 | }; 138 | 139 | // For convenience... 140 | Date.prototype.format = function (mask, utc) { 141 | return dateFormat(this, mask, utc); 142 | }; 143 | 144 | 145 | /* 146 | * ==================== 147 | * End date format code 148 | * ==================== 149 | */ 150 | 151 | function User(uid,name) 152 | { 153 | this.uid=uid; 154 | this.name=name; 155 | this.nodes=0; 156 | this.nodescreated = 0; 157 | this.currentnodes = 0; 158 | this.ways=0; 159 | this.wayscreated = 0; 160 | this.currentways = 0; 161 | this.relations=0; 162 | this.relationscreated = 0; 163 | this.currentrelations = 0; 164 | this.firstObj = new Date(); 165 | this.lastObj = new Date(1970,1,1); 166 | } 167 | 168 | function cloneFeature(n) 169 | { 170 | var copy = {}; 171 | copy.id = n.id; 172 | copy.version = n.version 173 | copy.timestamp = n.timestamp; 174 | copy.uid = n.uid; 175 | copy.user = n.user; 176 | copy.changeset = n.changeset; 177 | return copy; 178 | } 179 | 180 | function sort_by_totals(a,b) 181 | { 182 | return ((a.nodes + a.ways + a.relations) < (b.nodes + b.ways + b.relations) ? 1 : (a.nodes + a.ways + a.relations) > (b.nodes + b.ways + b.relations) ? -1 : 0); 183 | } 184 | 185 | function processlastfeature(cf) 186 | { 187 | // this relies on the file being sorted by id and version, is this the case? 188 | //if (doingways) { 189 | // print("way id/version: " + pf.id + "/" + pf.version); 190 | //} 191 | // seems to hold... 192 | 193 | iscurrent = (pf.id != cf.id); 194 | 195 | if(!users[pf.uid]) 196 | { 197 | users[pf.uid] = new User(pf.uid,pf.user); 198 | } 199 | 200 | var d1 = users[pf.uid].firstObj; 201 | var d2 = new Date(pf.timestamp) 202 | var d3 = users[pf.uid].lastObj; 203 | 204 | users[pf.uid].firstObj = (d1 < d2) ? d1 : d2; 205 | users[pf.uid].lastObj = (d3 > d2) ? d3 : d2; 206 | 207 | if (iscurrent) 208 | { 209 | // print('current!!'); 210 | if (doingnodes) 211 | { 212 | currentnodecnt++; 213 | users[pf.uid].currentnodes++; 214 | } 215 | else if (doingways) 216 | { 217 | currentwaycnt++; 218 | users[pf.uid].currentways++; 219 | } 220 | else 221 | { 222 | currentrelationcnt++; 223 | users[pf.uid].currentrelations++; 224 | } 225 | } 226 | 227 | if (doingnodes) 228 | { 229 | nodecnt++; 230 | users[pf.uid].nodes++; 231 | if (pf.version == 1) users[pf.uid].nodescreated++ 232 | if (nodecnt % interval == 0) print(nodecnt + '...'); 233 | } 234 | else if (doingways) 235 | { 236 | waycnt++; 237 | users[pf.uid].ways++; 238 | if (pf.version == 1) users[pf.uid].wayscreated++ 239 | if (waycnt % interval == 0) print(waycnt + '...'); 240 | } 241 | else 242 | { 243 | relationcnt++; 244 | users[pf.uid].relations++; 245 | if (pf.version == 1) users[pf.uid].relationscreated++ 246 | if (relationcnt % interval == 0) print(relationcnt + '...'); 247 | } 248 | } 249 | 250 | Osmium.Callbacks.init = function() 251 | { 252 | print('Running...'); 253 | t0 = new Date(); 254 | } 255 | 256 | Osmium.Callbacks.node = function() 257 | { 258 | if (!doingnodes) 259 | { 260 | // The before_* callbacks are not called, so we need a workaround. 261 | doingnodes = true; 262 | tnodes0 = new Date(); 263 | print('parsing nodes...'); 264 | } 265 | 266 | if (pf) 267 | { 268 | processlastfeature(this); 269 | } 270 | pf = cloneFeature(this); 271 | } 272 | 273 | Osmium.Callbacks.way = function() 274 | { 275 | if (doingnodes) 276 | { 277 | // The before_* callbacks are not called, so we need a workaround. 278 | // process last node before doing ways 279 | processlastfeature(pf); 280 | delete pf; 281 | doingnodes = false; 282 | doingways = true; 283 | tnodes1 = new Date(); 284 | print('parsing ways...'); 285 | } 286 | 287 | if (pf) 288 | { 289 | processlastfeature(this); 290 | } 291 | pf = cloneFeature(this); 292 | } 293 | 294 | Osmium.Callbacks.relation = function() 295 | { 296 | if (doingways) 297 | { 298 | // The before_* callbacks are not called, so we need a workaround. 299 | processlastfeature(pf); 300 | delete pf; 301 | doingways = false; 302 | doingrelations = true; 303 | tways1 = new Date(); 304 | print('parsing relations...'); 305 | } 306 | 307 | if (pf) processlastfeature(this); 308 | pf = cloneFeature(this); 309 | } 310 | 311 | Osmium.Callbacks.end = function() 312 | { 313 | print('output and cleanup...'); 314 | 315 | trelations1 = new Date(); 316 | users.sort(sort_by_totals); 317 | var realusercnt = 0; 318 | 319 | // Open output file in OUT_DIR 320 | var out = Osmium.Output.CSV.open(OUT_DIR + '/userstats.csv'); 321 | 322 | // Print headers 323 | out.print('uid\tusername\tnodes\tnodes_created\tcur nodes\tways\tways_created\tcur ways\trelations\trelations_created\tcur rels\tfirst\tlast\tdays active\ttotal edits\tcurrent objects\tavg edits/day\tpersistence'); 324 | 325 | // Caluculate metrics for each user 326 | for (var i=0;i 2 | 3 | 255 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | name 112 | 113 | 114 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /extras/sql/loaduserstats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if test -z "$2" 4 | then 5 | echo "Usage: loaduserstats.sh dbname /path/to/userstats.csv" 6 | else 7 | cat $2 | psql -d $1 -U osm -c "$(cat userstats_load.sql)"; 8 | fi 9 | -------------------------------------------------------------------------------- /extras/sql/userstats_load.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS userstats; 2 | CREATE TABLE userstats 3 | (uid integer PRIMARY KEY, 4 | username varchar, 5 | nodes integer, 6 | curnodes integer, 7 | ways integer, 8 | curways integer, 9 | relations integer, 10 | currels integer, 11 | firstedit date, 12 | lastedit date, 13 | daysactive smallint, 14 | totaledits integer, 15 | currentobjects integer, 16 | avgeditsperday real, 17 | persistence real); 18 | 19 | COPY userstats FROM stdin WITH (FORMAT 'csv', HEADER, DELIMITER ' '); 20 | -------------------------------------------------------------------------------- /scripts/generate_stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import glob 5 | from subprocess import call 6 | import commands 7 | import os 8 | import shutil 9 | 10 | osmjs_path = "" 11 | 12 | 13 | def usage(): 14 | print """ 15 | A script to generate user stats for a directory full of OSM PBF files - either full history or current PBF. 16 | 17 | Usage: generate_stats in_dir out_dir [path_to_osmium] 18 | 19 | in_dir contains one or more OSM files (.osm.pbf) 20 | out_dir is where the stats CSVs will be written 21 | 22 | Make sure that osmjs (part of osmium) is installed, osmjs is compiled, and in the current path (or pass path_to_osmium). 23 | """ 24 | sys.exit(1) 25 | 26 | if __name__ == '__main__': 27 | if len(sys.argv) < 3: 28 | print "Too few arguments ({num}).".format(num=len(sys.argv) - 1) 29 | usage() 30 | osmjs_path = commands.getoutput("which osmjs") 31 | if osmjs_path == "": 32 | if len(sys.argv) == 4: 33 | osmjs_path = os.path.join(sys.argv[3], 'osmjs/osmjs') 34 | if not (os.path.isfile(osmjs_path) and os.access(osmjs_path, os.X_OK)): 35 | print "osmjs is not in not at {osmjs_path}".format(osmjs_path=osmjs_path) 36 | usage() 37 | else: 38 | print "osmjs not in PATH and no path_to_osmium given." 39 | usage() 40 | path = sys.argv[1] 41 | files = glob.glob(path + '*.osm.pbf') 42 | print 'will process {num} files'.format(num=len(files)) 43 | for osmfile in files: 44 | basename = os.path.splitext(os.path.basename(osmfile))[0] 45 | print "Processing {basename}".format(basename=basename) 46 | call([osmjs_path, '-j', '../UserStats.js', '-l' 'array', osmfile]) 47 | dest_csv = os.path.join(sys.argv[2], basename + '.csv') 48 | print "ouputting stats file at {statspath}".format(statspath=dest_csv) 49 | shutil.move('userstats.csv', dest_csv) 50 | -------------------------------------------------------------------------------- /scripts/process-highwaystats-results.py: -------------------------------------------------------------------------------- 1 | /osm/script/process-highwaystats-results.py -------------------------------------------------------------------------------- /utils/UserStats.py: -------------------------------------------------------------------------------- 1 | import os 2 | from imposm.parser import OSMParser 3 | from tcdb import tdb 4 | from datetime import datetime 5 | import copy 6 | 7 | CACHE_LOCATION = '/osm/tmp' 8 | 9 | class UserCache(object): 10 | def __init__(self): 11 | self.previousFeature = None 12 | self.currentFeature = None 13 | try: 14 | path = os.path.join(CACHE_LOCATION, 'usercache.tdb') 15 | if os.path.exists(path): os.remove(path) 16 | print path 17 | self.cache = tdb.TDB() 18 | self.cache.open(path) 19 | except Exception as (strerr): 20 | print 'user cache file could not be created at %s, does the directory exist? If not, create it. If so, Check permissions and disk space.' % CACHE_LOCATION 21 | print strerr 22 | exit(1) 23 | 24 | def increment(self,uid,typ): 25 | uid = str(uid) 26 | typ = str(typ) 27 | try: 28 | tempdict = self.cache[uid] 29 | tempdict[typ] = tempdict.get(typ,0) + 1 30 | self.cache[uid][typ] = self.cache[uid].get(typ, 0) + 1 31 | self.cache[uid] = tempdict 32 | except KeyError: 33 | print 'creating record for {0}'.format(uid) 34 | self.cache[uid] = { 35 | 'coord' : 0, 36 | 'coordcreated' : 0, 37 | 'currentcoord' : 0, 38 | 'node' : 0, 39 | 'nodecreated' : 0, 40 | 'currentnode' : 0, 41 | 'way' : 0, 42 | 'waycreated' : 0, 43 | 'currentway' : 0, 44 | 'relation' : 0, 45 | 'relationcreated' : 0, 46 | 'currentrelation' : 0, 47 | 'firstobject' : datetime.now(), 48 | 'lastobject' : datetime.now() 49 | } 50 | tempdict = self.cache[uid] 51 | tempdict[typ] = tempdict.get(typ,0) + 1 52 | self.cache[uid][typ] = self.cache[uid].get(typ, 0) + 1 53 | self.cache[uid] = tempdict 54 | def result(self): 55 | print 'cache is now %i records' % len(self.cache) 56 | for key in self.cache: 57 | print key + ': ' + str(self.cache[key]) 58 | 59 | def userCount(self): 60 | return len(self.cache) 61 | 62 | def close(self): 63 | self.cache.close() 64 | 65 | class OSMFeature(object): 66 | def __init__(self, id = 0, version = 0, timestamp = datetime.now(), uid = 0, ftype = None): 67 | self.id = id 68 | self.version = version 69 | self.timestamp = timestamp 70 | self.uid = uid 71 | self.featuretype = ftype 72 | 73 | class UserStats(object): 74 | def __init__(self): 75 | self.cache = UserCache() 76 | self.currentFeature = OSMFeature() 77 | self.previousFeature = OSMFeature() 78 | 79 | def processLastFeature(self): 80 | current = (self.currentFeature.id != self.previousFeature.id) 81 | print '%i === %i' % (self.currentFeature.id, self.previousFeature.id) 82 | if not current: print 'current? ' + str(current) 83 | if self.previousFeature.featuretype == 'coord': 84 | self.cache.increment(self.previousFeature.uid, 'coord') 85 | elif self.previousFeature.featuretype == 'node': 86 | self.cache.increment(self.previousFeature.uid, 'node') 87 | elif self.previousFeature.featuretype == 'way': 88 | self.cache.increment(self.previousFeature.uid, 'way') 89 | elif self.previousFeature.featuretype == 'relation': 90 | self.cache.increment(self.previousFeature.uid, 'relation') 91 | 92 | def coords_callback(self, coords): 93 | for osmid, lon, lat, osmversion, osmtimestamp, osmuid in coords: 94 | print 'coord %i' % osmid 95 | self.currentFeature = OSMFeature(osmid, osmversion, osmtimestamp, osmuid, 'coord') 96 | if self.previousFeature.id > 0: 97 | self.processLastFeature() 98 | self.previousFeature = copy.deepcopy(self.currentFeature) 99 | 100 | 101 | def nodes_callback(self, nodes): 102 | for osmid, tags, ref, osmversion, osmtimestamp, osmuid in nodes: 103 | print 'node %i' % osmid 104 | self.currentFeature = OSMFeature(osmid, osmversion, osmtimestamp, osmuid, 'node') 105 | if self.previousFeature: 106 | processLastFeature(self.currentFeature) 107 | self.previousFeature = copy.deepcopy(self.currentFeature) 108 | 109 | def ways_callback(self, ways): 110 | for osmid, lon, lat, osmversion, osmtimestamp, osmuid in ways: 111 | print 'way %i' % osmid 112 | self.currentFeature = OSMFeature(osmid, osmversion, osmtimestamp, osmuid, 'way') 113 | if self.previousFeature: 114 | processLastFeature(self.currentFeature) 115 | self.previousFeature = copy.deepcopy(self.currentFeature) 116 | 117 | def relations_callback(self, relations): 118 | for osmid, lon, lat, osmversion, osmtimestamp, osmuid in relations: 119 | print 'relation %i' % osmid 120 | self.currentFeature = OSMFeature(osmid, osmversion, osmtimestamp, osmuid, 'relation') 121 | if self.previousFeature: 122 | processLastFeature(self.currentFeature) 123 | self.previousFeature = copy.deepcopy(self.currentFeature) 124 | 125 | 126 | # instantiate counter and parser and start parsing 127 | u = UserStats() 128 | p = OSMParser(concurrency=4, coords_callback = u.coords_callback, nodes_callback = u.nodes_callback, ways_callback = u.ways_callback, relations_callback = u.relations_callback) 129 | print "parsing..." 130 | #try: 131 | # os.open('/home/mvexel/osm/planet/amsterdam.osh.pbf') 132 | # os.close() 133 | #except IOError: 134 | # print 'oops' 135 | p.parse('/osm/planet/utah.osh.pbf') 136 | 137 | print u.cache.result() 138 | u.cache.close() 139 | --------------------------------------------------------------------------------