├── scripts
├── process-highwaystats-results.py
└── generate_stats.py
├── extras
├── qgis
│ ├── styled-ways-example.png
│ └── ways-age-version.qml
└── sql
│ ├── loaduserstats.sh
│ └── userstats_load.sql
├── example-output
├── OSMQualityMetrics
│ ├── relations.csv
│ ├── tiger.csv
│ ├── metrostats.csv
│ └── userstats.csv
└── UserStats
│ └── userstats.csv
├── README.md
├── utils
└── UserStats.py
├── OSMHighwayMetrics.js
├── UserStats.js
└── OSMQualityMetrics.js
/scripts/process-highwaystats-results.py:
--------------------------------------------------------------------------------
1 | /osm/script/process-highwaystats-results.py
--------------------------------------------------------------------------------
/extras/qgis/styled-ways-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mvexel/OSMQualityMetrics/HEAD/extras/qgis/styled-ways-example.png
--------------------------------------------------------------------------------
/example-output/OSMQualityMetrics/relations.csv:
--------------------------------------------------------------------------------
1 | route 9
2 | multipolygon 169
3 | city 7
4 | building 2
5 | site 3
6 | Airport 1
7 | Military 1
8 | boundary 1
9 | restriction 69
10 | destination_sign 2
11 |
--------------------------------------------------------------------------------
/extras/sql/loaduserstats.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if test -z "$2"
4 | then
5 | echo "Usage: loaduserstats.sh dbname /path/to/userstats.csv"
6 | else
7 | cat $2 | psql -d $1 -U osm -c "$(cat userstats_load.sql)";
8 | fi
9 |
--------------------------------------------------------------------------------
/example-output/OSMQualityMetrics/tiger.csv:
--------------------------------------------------------------------------------
1 | A41 7507
2 | A71 23
3 | A63 67
4 | A74 24
5 | A73 48
6 | A72 13
7 | A65 4
8 | A31 43
9 | A64 1
10 | A45 1
11 | B21 35
12 | A51 1
13 | B11 15
14 | B13 3
15 | A44 1
16 | A31; A41 8
17 | A11 21
18 | A11; A63 1
19 | B12 1
20 |
--------------------------------------------------------------------------------
/extras/sql/userstats_load.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS userstats;
2 | CREATE TABLE userstats
3 | (uid integer PRIMARY KEY,
4 | username varchar,
5 | nodes integer,
6 | curnodes integer,
7 | ways integer,
8 | curways integer,
9 | relations integer,
10 | currels integer,
11 | firstedit date,
12 | lastedit date,
13 | daysactive smallint,
14 | totaledits integer,
15 | currentobjects integer,
16 | avgeditsperday real,
17 | persistence real);
18 |
19 | COPY userstats FROM stdin WITH (FORMAT 'csv', HEADER, DELIMITER ' ');
20 |
--------------------------------------------------------------------------------
/example-output/OSMQualityMetrics/metrostats.csv:
--------------------------------------------------------------------------------
1 | total nodes 131073
2 | total ways 21148
3 | total relations 255
4 | total users 287
5 | avg tags per node 0.20859368443539097
6 | avg tags per way 6.222716096084736
7 | avg tags per relation 4.482352941176471
8 | pct nodes not in way 0.023010078353284048
9 | avg node version 2.2867486057387847
10 | avg way version 2.9573009268016053
11 | avg relation version 10.662745098039212
12 | contribution thresholds 0.9,0.95,0.99
13 | users 18,29,73
14 | percentage 0.0627177700348432,0.10104529616724739,0.25435540069686413
15 | data temperature 57.24615812019907
16 | amt non-tiger ways 7845
17 | pct non-tiger ways 0.37095706449782484
18 | amt untouched tiger 384
19 | pct untouched tiger 0.048948374760994263
20 | avg increase over TIGER 3.620650095602294
21 | poi nodes 2355
22 | transport nodes 3966
23 | named cnt 2269
24 | age cohort thresholds 2592000,7776000,15552000,31536000,63072000
25 | age cohorts 0.0,1.0,4.2,20.9,38.7,35.2
26 |
--------------------------------------------------------------------------------
/scripts/generate_stats.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import sys
4 | import glob
5 | from subprocess import call
6 | import commands
7 | import os
8 | import shutil
9 |
10 | osmjs_path = ""
11 |
12 |
13 | def usage():
14 | print """
15 | A script to generate user stats for a directory full of OSM PBF files - either full history or current PBF.
16 |
17 | Usage: generate_stats in_dir out_dir [path_to_osmium]
18 |
19 | in_dir contains one or more OSM files (.osm.pbf)
20 | out_dir is where the stats CSVs will be written
21 |
22 | Make sure that osmjs (part of osmium) is installed, osmjs is compiled, and in the current path (or pass path_to_osmium).
23 | """
24 | sys.exit(1)
25 |
26 | if __name__ == '__main__':
27 | if len(sys.argv) < 3:
28 | print "Too few arguments ({num}).".format(num=len(sys.argv) - 1)
29 | usage()
30 | osmjs_path = commands.getoutput("which osmjs")
31 | if osmjs_path == "":
32 | if len(sys.argv) == 4:
33 | osmjs_path = os.path.join(sys.argv[3], 'osmjs/osmjs')
34 | if not (os.path.isfile(osmjs_path) and os.access(osmjs_path, os.X_OK)):
35 | print "osmjs is not in not at {osmjs_path}".format(osmjs_path=osmjs_path)
36 | usage()
37 | else:
38 | print "osmjs not in PATH and no path_to_osmium given."
39 | usage()
40 | path = sys.argv[1]
41 | files = glob.glob(path + '*.osm.pbf')
42 | print 'will process {num} files'.format(num=len(files))
43 | for osmfile in files:
44 | basename = os.path.splitext(os.path.basename(osmfile))[0]
45 | print "Processing {basename}".format(basename=basename)
46 | call([osmjs_path, '-j', '../UserStats.js', '-l' 'array', osmfile])
47 | dest_csv = os.path.join(sys.argv[2], basename + '.csv')
48 | print "ouputting stats file at {statspath}".format(statspath=dest_csv)
49 | shutil.move('userstats.csv', dest_csv)
50 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Read This First
2 |
3 | 5/2018: The [osm-editing-metrics](https://github.com/mvexel/osm-editing-metrics) is the spiritual successor to this repo. It uses the new osmium and its nodejs bindings for 🚀 performance.
4 |
5 | 12/2017: This project is a few years old and I haven't given it any attention. There are probably better tools out there to do quality analysis on OSM data now, especially at scale. Have a look at Mapbox's QA Tiles and some of the work done with them, for example [by Jennings Anderson](https://mapbox.github.io/osm-analysis-collab/).
6 |
7 | If you do find this project useful and are familiar with Osmium and its Javascript bindings, let me know if you want to take it over or become a maintainer!
8 |
9 | Quality Metrics Suite
10 | =====================
11 | This is a growing set of OSMJS scripts that allow you to calculate quality
12 | matrics on OSM data. Currently it consists of:
13 | - OSMQualityMetrics.js - various quality metrics, for normal OSM files
14 | - UserStats.js - historical user metrics, for full history OSM files
15 |
16 | Setup
17 | =====
18 | You need the osmium framework for it to work, get osmium at
19 | https://github.com/joto/osmium and follow the install instructions
20 | given there. With Osmium set up, build OSMJS following the
21 | instructions at https://github.com/joto/osmium/tree/master/osmjs
22 |
23 | Running the script
24 | ==================
25 | With OSMJS compiled, run one of the scripts:
26 |
27 | /path/to/osmjs -j OSMQualityMetrics.js -l array /path/to/data.osm
28 |
29 |
30 | (It works equally well - perhaps even better - with a PBF input file, if you have PBF support in osmium)
31 |
32 | The output on screen will look something like this:
33 |
34 | Running...
35 | parsing nodes...
36 | parsing ways...
37 | parsing relations...
38 | output and cleanup...
39 | total nodes / ways / relations: 51777 / 5040 / 28
40 | finished!
41 | Timings:
42 | total: 5002 ms
43 | ---------------------
44 | nodes: 4270ms
45 | ways: 617ms
46 | relations: 3ms
47 | overhead: 112ms
48 |
49 | The scripts will generate output in the OUT_DIR specified in the script file.
50 |
51 | For OSMQualityMetrics.js the output will be:
52 | - `ways.*` : A shapefile containing all way geometries with `version` and
53 | `timestamp` attributes.
54 | - `metrostats.csv` : The data metrics.
55 | - `tiger.csv` : Breakdown of TIGER CFCC classes
56 | - `relations.csv` : Breakdown of relation types
57 | - `userstats.csv` : User breakdown
58 |
59 | For UserStats.js the output will be:
60 | - `userstats.csv` : Historical user breakdown
61 |
62 | Sample output files are included in the `example-output` directory
63 |
64 | Notes
65 | =====
66 | - if you don't need the ways shapefile, you can set the OUTPUT_WAYS
67 | variable to false in the script. You can then also leave out the -l
68 | parameter when running the script and speed things up.
69 | - The `-l array` option is best for large OSM files. If you're working
70 | with smaller, city-sized OSM data files, use `-l sparsetable`. Run `osmjs -h`
71 | for more info.
72 | - The scripts will save its output files in the current working
73 | directory if no OUT_DIR is specified.
74 |
75 | Timings
76 | =======
77 | OSMQualityMetrics.js: On a Intel® Core™ i5-2410M CPU @ 2.30GHzx4 machine with 8GB of RAM
78 | running Ubuntu Oneiric, a 55MB bz2-compressed OSM XML file takes 103
79 | seconds to process without way shapefile output. With way shapefile
80 | output using the sparsetable storage, the same file took 133 seconds to
81 | process.
82 |
83 | Extras
84 | ======
85 | The generated ways shapefiles will include version and timestamp attributes. You can use those to create interesting visualizations, like the ones shown in Martijn van Exel's [talk](http://www.slideshare.net/mvexel/insert-coin-to-play) at State Of The Map 2011. The Quantum GIS style file used to generate these images is included in the `qgis` folder.
86 | 
87 |
88 | What's Next
89 | ===========
90 | Easy
91 | ----
92 | * Add way length statistics
93 | * More attributes in ways output
94 | * More specific stylings
95 | * More specific statistics on relations
96 |
97 | Harder
98 | ------
99 | * More metrics on full history files
100 |
--------------------------------------------------------------------------------
/utils/UserStats.py:
--------------------------------------------------------------------------------
1 | import os
2 | from imposm.parser import OSMParser
3 | from tcdb import tdb
4 | from datetime import datetime
5 | import copy
6 |
7 | CACHE_LOCATION = '/osm/tmp'
8 |
9 | class UserCache(object):
10 | def __init__(self):
11 | self.previousFeature = None
12 | self.currentFeature = None
13 | try:
14 | path = os.path.join(CACHE_LOCATION, 'usercache.tdb')
15 | if os.path.exists(path): os.remove(path)
16 | print path
17 | self.cache = tdb.TDB()
18 | self.cache.open(path)
19 | except Exception as (strerr):
20 | print 'user cache file could not be created at %s, does the directory exist? If not, create it. If so, Check permissions and disk space.' % CACHE_LOCATION
21 | print strerr
22 | exit(1)
23 |
24 | def increment(self,uid,typ):
25 | uid = str(uid)
26 | typ = str(typ)
27 | try:
28 | tempdict = self.cache[uid]
29 | tempdict[typ] = tempdict.get(typ,0) + 1
30 | self.cache[uid][typ] = self.cache[uid].get(typ, 0) + 1
31 | self.cache[uid] = tempdict
32 | except KeyError:
33 | print 'creating record for {0}'.format(uid)
34 | self.cache[uid] = {
35 | 'coord' : 0,
36 | 'coordcreated' : 0,
37 | 'currentcoord' : 0,
38 | 'node' : 0,
39 | 'nodecreated' : 0,
40 | 'currentnode' : 0,
41 | 'way' : 0,
42 | 'waycreated' : 0,
43 | 'currentway' : 0,
44 | 'relation' : 0,
45 | 'relationcreated' : 0,
46 | 'currentrelation' : 0,
47 | 'firstobject' : datetime.now(),
48 | 'lastobject' : datetime.now()
49 | }
50 | tempdict = self.cache[uid]
51 | tempdict[typ] = tempdict.get(typ,0) + 1
52 | self.cache[uid][typ] = self.cache[uid].get(typ, 0) + 1
53 | self.cache[uid] = tempdict
54 | def result(self):
55 | print 'cache is now %i records' % len(self.cache)
56 | for key in self.cache:
57 | print key + ': ' + str(self.cache[key])
58 |
59 | def userCount(self):
60 | return len(self.cache)
61 |
62 | def close(self):
63 | self.cache.close()
64 |
65 | class OSMFeature(object):
66 | def __init__(self, id = 0, version = 0, timestamp = datetime.now(), uid = 0, ftype = None):
67 | self.id = id
68 | self.version = version
69 | self.timestamp = timestamp
70 | self.uid = uid
71 | self.featuretype = ftype
72 |
73 | class UserStats(object):
74 | def __init__(self):
75 | self.cache = UserCache()
76 | self.currentFeature = OSMFeature()
77 | self.previousFeature = OSMFeature()
78 |
79 | def processLastFeature(self):
80 | current = (self.currentFeature.id != self.previousFeature.id)
81 | print '%i === %i' % (self.currentFeature.id, self.previousFeature.id)
82 | if not current: print 'current? ' + str(current)
83 | if self.previousFeature.featuretype == 'coord':
84 | self.cache.increment(self.previousFeature.uid, 'coord')
85 | elif self.previousFeature.featuretype == 'node':
86 | self.cache.increment(self.previousFeature.uid, 'node')
87 | elif self.previousFeature.featuretype == 'way':
88 | self.cache.increment(self.previousFeature.uid, 'way')
89 | elif self.previousFeature.featuretype == 'relation':
90 | self.cache.increment(self.previousFeature.uid, 'relation')
91 |
92 | def coords_callback(self, coords):
93 | for osmid, lon, lat, osmversion, osmtimestamp, osmuid in coords:
94 | print 'coord %i' % osmid
95 | self.currentFeature = OSMFeature(osmid, osmversion, osmtimestamp, osmuid, 'coord')
96 | if self.previousFeature.id > 0:
97 | self.processLastFeature()
98 | self.previousFeature = copy.deepcopy(self.currentFeature)
99 |
100 |
101 | def nodes_callback(self, nodes):
102 | for osmid, tags, ref, osmversion, osmtimestamp, osmuid in nodes:
103 | print 'node %i' % osmid
104 | self.currentFeature = OSMFeature(osmid, osmversion, osmtimestamp, osmuid, 'node')
105 | if self.previousFeature:
106 | processLastFeature(self.currentFeature)
107 | self.previousFeature = copy.deepcopy(self.currentFeature)
108 |
109 | def ways_callback(self, ways):
110 | for osmid, lon, lat, osmversion, osmtimestamp, osmuid in ways:
111 | print 'way %i' % osmid
112 | self.currentFeature = OSMFeature(osmid, osmversion, osmtimestamp, osmuid, 'way')
113 | if self.previousFeature:
114 | processLastFeature(self.currentFeature)
115 | self.previousFeature = copy.deepcopy(self.currentFeature)
116 |
117 | def relations_callback(self, relations):
118 | for osmid, lon, lat, osmversion, osmtimestamp, osmuid in relations:
119 | print 'relation %i' % osmid
120 | self.currentFeature = OSMFeature(osmid, osmversion, osmtimestamp, osmuid, 'relation')
121 | if self.previousFeature:
122 | processLastFeature(self.currentFeature)
123 | self.previousFeature = copy.deepcopy(self.currentFeature)
124 |
125 |
126 | # instantiate counter and parser and start parsing
127 | u = UserStats()
128 | p = OSMParser(concurrency=4, coords_callback = u.coords_callback, nodes_callback = u.nodes_callback, ways_callback = u.ways_callback, relations_callback = u.relations_callback)
129 | print "parsing..."
130 | #try:
131 | # os.open('/home/mvexel/osm/planet/amsterdam.osh.pbf')
132 | # os.close()
133 | #except IOError:
134 | # print 'oops'
135 | p.parse('/osm/planet/utah.osh.pbf')
136 |
137 | print u.cache.result()
138 | u.cache.close()
139 |
--------------------------------------------------------------------------------
/extras/qgis/ways-age-version.qml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 255
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 | name
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
--------------------------------------------------------------------------------
/OSMHighwayMetrics.js:
--------------------------------------------------------------------------------
1 | /*
2 | * OSMHighwayMetrics.js
3 | * ====================
4 | * This is an OSMJS script that generates highway stats for US planets
5 | *
6 | * Setup
7 | * =====
8 | * You need the osmium framework for it to work, get osmium at
9 | * https://github.com/joto/osmium and follow the install instructions
10 | * given there. With Osmium set up, build OSMJS following the
11 | * instructions at https://github.com/joto/osmium/tree/master/osmjs
12 | *
13 | * Running the script
14 | * ==================
15 | * With OSMJS compiled, run the script:
16 | * /path/to/osmjs -j OSMHighwayMetrics.js /path/to/data.osm [prefix]
17 | *
18 | * The script will generate one output file:
19 | * - [prefix_]highwaystats.csv : Highway statistics.
20 | * - [prefix_]userstats.csv : users involved with the highways.
21 | *
22 | * Notes
23 | * =====
24 | * - The script will save its output files in the current working
25 | * directory.
26 | *
27 | * License
28 | * =======
29 | * Copyright (c) 2011 Martijn van Exel
30 | *
31 | * Permission is hereby granted, free of charge, to any person obtaining
32 | * a copy of this software and associated documentation files (the
33 | * "Software"), to deal in the Software without restriction, including
34 | * without limitation the rights to use, copy, modify, merge, publish,
35 | * distribute, sublicense, and/or sell copies of the Software, and to
36 | * permit persons to whom the Software is furnished to do so, subject
37 | * to the following conditions:
38 | *
39 | * The above copyright notice and this permission notice shall be
40 | * included in all copies or substantial portions of the Software.
41 | *
42 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
43 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
44 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
45 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
46 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
47 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
48 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
49 | * SOFTWARE.
50 | */
51 |
52 | /*
53 | * CONFIGURATION
54 | */
55 |
56 | // Here you can optionally set a directory for the output. If not set,
57 | // the output will be written to your current working directory.
58 | // Should end with a trailing slash.
59 | var OUT_DIR = '';
60 |
61 | // Known bots -- these will not be taken into account for the userstats
62 | var known_bots = ['woodpeck_fixbot', 'balrog-kun', 'nhd-import', 'TIGERcnl', 'DaveHansenTiger'];
63 |
64 | // Highway values that are not part of the navigable road network
65 | var navigablehighwayvalues = ['motorway','motorway_link','trunk','trunk_link','primary','primary_link','secondary','secondary_link','tertiary','residential','unclassified','living_street','road','service'];
66 |
67 | /*
68 | * You should only modify the code below this line if you're familiar
69 | * with JavaScript and OSMJS
70 | * =====================================================================
71 | */
72 |
73 | // GLOBALS
74 | var users = [];
75 | var ages = [];
76 | var doingnodes = false, doingways = false, doingrelations = false;
77 | var nodes = 0, ways = 0,relations = 0;
78 | var ways = 0, highways = 0, navigablehighways = 0, tigerways = 0;
79 | var tigeruntouchedways = 0, tigerversionincrease = 0;
80 | var t0, t1, tnodes0, tnodes1, tways1, trelations1;
81 |
82 | // Get the outpuf file prefix from the command line argument.
83 | outprefix = argv.length == 1 ? argv[0] + "_" : "";
84 | print(outprefix);
85 |
86 | function User(uid,name) {
87 | this.uid=uid;
88 | this.name=name;
89 | this.ways=0;
90 | this.highways = 0;
91 | this.navigablehighways = 0;
92 | this.tigerways = 0;
93 | }
94 |
95 | function sort_by_tigerways(a,b) {
96 | return (a.tigerways < b.tigerways) ? 1 : (a.tigerways > b.tigerways) ? -1 : 0;
97 | }
98 |
99 | Osmium.Callbacks.init = function() {
100 | print('Running...');
101 | t0 = new Date();
102 | }
103 |
104 | Osmium.Callbacks.node = function() {
105 | if (!doingnodes) {
106 | // The before_* callbacks are not called, so we need a workaround.
107 | doingnodes = true;
108 | tnodes0 = new Date();
109 | print('parsing nodes...');
110 | }
111 | nodes+=1;
112 | }
113 |
114 | Osmium.Callbacks.way = function() {
115 | if (doingnodes) {
116 | // The before_* callbacks are not called, so we need a workaround.
117 | doingnodes = false;
118 | doingways = true;
119 | tnodes1 = new Date();
120 | print('parsing ways...');
121 | }
122 |
123 | var highway = 0;
124 | var tiger = 0;
125 | var navigable = 0;
126 |
127 | if(!users[this.uid]) {
128 | users[this.uid] = new User(this.uid,this.user);
129 | }
130 |
131 | users[this.uid].ways+=1;
132 |
133 | ways++;
134 |
135 | for(var key in this.tags) {
136 | highway += (key == 'highway') ? 1:0;
137 | tiger += (key.match(/tiger/ig)) ? 1:0;
138 | navigable += (key == 'highway' && navigablehighwayvalues.indexOf(this.tags[key]) > -1) ? 1:0;
139 | }
140 |
141 | if(highway>0) {
142 | users[this.uid].highways++;
143 | highways++;
144 | }
145 |
146 | if(tiger>0) {
147 | tigerways++;
148 | if(this.version==1) tigeruntouchedways++;
149 | else users[this.uid].tigerways += 1;
150 | tigerversionincrease = tigerversionincrease + (this.version - 1 - tigerversionincrease) / tigerways;
151 | }
152 |
153 | if (navigable>0) {
154 | navigablehighways++;
155 | users[this.uid].navigablehighways++;
156 | }
157 |
158 | }
159 |
160 | Osmium.Callbacks.relation = function() {
161 | if (doingways) {
162 | // The before_* callbacks are not called, so we need a workaround.
163 | doingways = false;
164 | doingrelations = true;
165 | tways1 = new Date();
166 | print('parsing relations...');
167 | }
168 | relations+=1;
169 | }
170 |
171 | Osmium.Callbacks.end = function() {
172 | print('output and cleanup...');
173 |
174 | // CLEAN UP
175 | trelations1 = new Date();
176 | users.sort(sort_by_tigerways);
177 |
178 | var outuserstats = Osmium.Output.CSV.open(OUT_DIR + outprefix + 'userstats.csv');
179 | outuserstats.print('#\tuid\tusername\tways\thighways\ttigerways\tnavigablehighways\tprecentile');
180 | var cumulativetiger = 0;
181 | var grandtotal = nodes + ways + relations;
182 | var realusercnt = 0;
183 |
184 | var botnodes = 0;
185 | var botways = 0;
186 | var botrelations = 0;
187 |
188 | for (var i=0;i -1) {
192 | botnodes += users[i].nodes;
193 | botways += users[i].ways;
194 | botrelations += users[i].relations;
195 | continue;
196 | };
197 | };
198 | // SECOND PASS
199 | for (var i=0; i -1) continue;
202 | cumulativetiger += users[i].tigerways;
203 | // outuserstats.print(users[i].uid, users[i].name, users[i].nodes, users[i].ways, users[i].relations, cumfeatures / (grandtotal - botnodes - botways - botrelations));
204 | outuserstats.print(i+1, users[i].uid, users[i].name, users[i].ways, users[i].highways, users[i].tigerways, users[i].navigablehighways, cumulativetiger / (tigerways - tigeruntouchedways));
205 | }
206 | outuserstats.close();
207 |
208 | // WRITE BASE STATS
209 | var outhighways = Osmium.Output.CSV.open(OUT_DIR + outprefix + 'highwaystats.csv');
210 |
211 | outhighways.print('total nodes',nodes)
212 | outhighways.print('total ways',ways)
213 | outhighways.print('total relations',relations)
214 | outhighways.print('total users involved in ways',realusercnt)
215 | outhighways.print('amt highways',highways);
216 | outhighways.print('pct highways', highways/ways);
217 | outhighways.print('amt navigable highways',navigablehighways);
218 | outhighways.print('pct navigable highways', navigablehighways/highways);
219 | outhighways.print('amt tiger ways',tigerways);
220 | outhighways.print('pct tiger ways',tigerways/ways);
221 | outhighways.print('amt untouched tiger',tigeruntouchedways);
222 | outhighways.print('pct untouched tiger',tigeruntouchedways / tigerways);
223 | outhighways.print('avg increase over TIGER',tigerversionincrease);
224 |
225 | outhighways.close();
226 |
227 | // OUTPUT TIMINGS
228 | t1 = new Date();
229 | var tnodes=tnodes1-tnodes0;tways=tways1-tnodes1;trelations=trelations1-tways1;
230 | print('finished!\nTimings:\ntotal: ' + (t1-t0) + ' ms\n---------------------\nnodes: ' + tnodes + 'ms\nways: ' + tways + 'ms\nrelations: ' + trelations + 'ms\noverhead: ' + ((t1-t0)-(tnodes+tways+trelations)) + 'ms');
231 | }
232 |
--------------------------------------------------------------------------------
/UserStats.js:
--------------------------------------------------------------------------------
1 | /*
2 | * UserStats.js
3 | * ====================
4 | */
5 |
6 | var OUT_DIR = '.';
7 |
8 | var day = 1000* 60*60*24;
9 | var doingnodes = false, doingways = false, doingrelations = false;
10 | var users = [];
11 | var pf, iscurrent;
12 | var nodecnt = 0, waycnt = 0, relationcnt = 0;
13 | var currentnodecnt = 0, currentwaycnt = 0, currentrelationcnt = 0;
14 | var interval = 1000000;
15 | var t0, t1, tnodes0, tnodes1, tways1, trelations1;
16 |
17 | /*
18 | * Date Format 1.2.3
19 | * (c) 2007-2009 Steven Levithan
20 | * MIT license
21 | *
22 | * Includes enhancements by Scott Trenda
23 | * and Kris Kowal
24 | *
25 | * Accepts a date, a mask, or a date and a mask.
26 | * Returns a formatted version of the given date.
27 | * The date defaults to the current date/time.
28 | * The mask defaults to dateFormat.masks.default.
29 | */
30 |
31 | var dateFormat = function () {
32 | var token = /d{1,4}|m{1,4}|yy(?:yy)?|([HhMsTt])\1?|[LloSZ]|"[^"]*"|'[^']*'/g,
33 | timezone = /\b(?:[PMCEA][SDP]T|(?:Pacific|Mountain|Central|Eastern|Atlantic) (?:Standard|Daylight|Prevailing) Time|(?:GMT|UTC)(?:[-+]\d{4})?)\b/g,
34 | timezoneClip = /[^-+\dA-Z]/g,
35 | pad = function (val, len) {
36 | val = String(val);
37 | len = len || 2;
38 | while (val.length < len) val = "0" + val;
39 | return val;
40 | };
41 |
42 | // Regexes and supporting functions are cached through closure
43 | return function (date, mask, utc) {
44 | var dF = dateFormat;
45 |
46 | // You can't provide utc if you skip other args (use the "UTC:" mask prefix)
47 | if (arguments.length == 1 && Object.prototype.toString.call(date) == "[object String]" && !/\d/.test(date)) {
48 | mask = date;
49 | date = undefined;
50 | }
51 |
52 | // Passing date through Date applies Date.parse, if necessary
53 | date = date ? new Date(date) : new Date;
54 | if (isNaN(date)) throw SyntaxError("invalid date");
55 |
56 | mask = String(dF.masks[mask] || mask || dF.masks["default"]);
57 |
58 | // Allow setting the utc argument via the mask
59 | if (mask.slice(0, 4) == "UTC:") {
60 | mask = mask.slice(4);
61 | utc = true;
62 | }
63 |
64 | var _ = utc ? "getUTC" : "get",
65 | d = date[_ + "Date"](),
66 | D = date[_ + "Day"](),
67 | m = date[_ + "Month"](),
68 | y = date[_ + "FullYear"](),
69 | H = date[_ + "Hours"](),
70 | M = date[_ + "Minutes"](),
71 | s = date[_ + "Seconds"](),
72 | L = date[_ + "Milliseconds"](),
73 | o = utc ? 0 : date.getTimezoneOffset(),
74 | flags = {
75 | d: d,
76 | dd: pad(d),
77 | ddd: dF.i18n.dayNames[D],
78 | dddd: dF.i18n.dayNames[D + 7],
79 | m: m + 1,
80 | mm: pad(m + 1),
81 | mmm: dF.i18n.monthNames[m],
82 | mmmm: dF.i18n.monthNames[m + 12],
83 | yy: String(y).slice(2),
84 | yyyy: y,
85 | h: H % 12 || 12,
86 | hh: pad(H % 12 || 12),
87 | H: H,
88 | HH: pad(H),
89 | M: M,
90 | MM: pad(M),
91 | s: s,
92 | ss: pad(s),
93 | l: pad(L, 3),
94 | L: pad(L > 99 ? Math.round(L / 10) : L),
95 | t: H < 12 ? "a" : "p",
96 | tt: H < 12 ? "am" : "pm",
97 | T: H < 12 ? "A" : "P",
98 | TT: H < 12 ? "AM" : "PM",
99 | Z: utc ? "UTC" : (String(date).match(timezone) || [""]).pop().replace(timezoneClip, ""),
100 | o: (o > 0 ? "-" : "+") + pad(Math.floor(Math.abs(o) / 60) * 100 + Math.abs(o) % 60, 4),
101 | S: ["th", "st", "nd", "rd"][d % 10 > 3 ? 0 : (d % 100 - d % 10 != 10) * d % 10]
102 | };
103 |
104 | return mask.replace(token, function ($0) {
105 | return $0 in flags ? flags[$0] : $0.slice(1, $0.length - 1);
106 | });
107 | };
108 | }();
109 |
110 | // Some common format strings
111 | dateFormat.masks = {
112 | "default": "ddd mmm dd yyyy HH:MM:ss",
113 | shortDate: "m/d/yy",
114 | mediumDate: "mmm d, yyyy",
115 | longDate: "mmmm d, yyyy",
116 | fullDate: "dddd, mmmm d, yyyy",
117 | shortTime: "h:MM TT",
118 | mediumTime: "h:MM:ss TT",
119 | longTime: "h:MM:ss TT Z",
120 | isoDate: "yyyy-mm-dd",
121 | isoTime: "HH:MM:ss",
122 | isoDateTime: "yyyy-mm-dd'T'HH:MM:ss",
123 | isoUtcDateTime: "UTC:yyyy-mm-dd'T'HH:MM:ss'Z'"
124 | };
125 |
126 |
127 | // Internationalization strings
128 | dateFormat.i18n = {
129 | dayNames: [
130 | "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat",
131 | "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
132 | ],
133 | monthNames: [
134 | "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
135 | "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"
136 | ]
137 | };
138 |
139 | // For convenience...
140 | Date.prototype.format = function (mask, utc) {
141 | return dateFormat(this, mask, utc);
142 | };
143 |
144 |
145 | /*
146 | * ====================
147 | * End date format code
148 | * ====================
149 | */
150 |
151 | function User(uid,name)
152 | {
153 | this.uid=uid;
154 | this.name=name;
155 | this.nodes=0;
156 | this.nodescreated = 0;
157 | this.currentnodes = 0;
158 | this.ways=0;
159 | this.wayscreated = 0;
160 | this.currentways = 0;
161 | this.relations=0;
162 | this.relationscreated = 0;
163 | this.currentrelations = 0;
164 | this.firstObj = new Date();
165 | this.lastObj = new Date(1970,1,1);
166 | }
167 |
168 | function cloneFeature(n)
169 | {
170 | var copy = {};
171 | copy.id = n.id;
172 | copy.version = n.version
173 | copy.timestamp = n.timestamp;
174 | copy.uid = n.uid;
175 | copy.user = n.user;
176 | copy.changeset = n.changeset;
177 | return copy;
178 | }
179 |
180 | function sort_by_totals(a,b)
181 | {
182 | return ((a.nodes + a.ways + a.relations) < (b.nodes + b.ways + b.relations) ? 1 : (a.nodes + a.ways + a.relations) > (b.nodes + b.ways + b.relations) ? -1 : 0);
183 | }
184 |
185 | function processlastfeature(cf)
186 | {
187 | // this relies on the file being sorted by id and version, is this the case?
188 | //if (doingways) {
189 | // print("way id/version: " + pf.id + "/" + pf.version);
190 | //}
191 | // seems to hold...
192 |
193 | iscurrent = (pf.id != cf.id);
194 |
195 | if(!users[pf.uid])
196 | {
197 | users[pf.uid] = new User(pf.uid,pf.user);
198 | }
199 |
200 | var d1 = users[pf.uid].firstObj;
201 | var d2 = new Date(pf.timestamp)
202 | var d3 = users[pf.uid].lastObj;
203 |
204 | users[pf.uid].firstObj = (d1 < d2) ? d1 : d2;
205 | users[pf.uid].lastObj = (d3 > d2) ? d3 : d2;
206 |
207 | if (iscurrent)
208 | {
209 | // print('current!!');
210 | if (doingnodes)
211 | {
212 | currentnodecnt++;
213 | users[pf.uid].currentnodes++;
214 | }
215 | else if (doingways)
216 | {
217 | currentwaycnt++;
218 | users[pf.uid].currentways++;
219 | }
220 | else
221 | {
222 | currentrelationcnt++;
223 | users[pf.uid].currentrelations++;
224 | }
225 | }
226 |
227 | if (doingnodes)
228 | {
229 | nodecnt++;
230 | users[pf.uid].nodes++;
231 | if (pf.version == 1) users[pf.uid].nodescreated++
232 | if (nodecnt % interval == 0) print(nodecnt + '...');
233 | }
234 | else if (doingways)
235 | {
236 | waycnt++;
237 | users[pf.uid].ways++;
238 | if (pf.version == 1) users[pf.uid].wayscreated++
239 | if (waycnt % interval == 0) print(waycnt + '...');
240 | }
241 | else
242 | {
243 | relationcnt++;
244 | users[pf.uid].relations++;
245 | if (pf.version == 1) users[pf.uid].relationscreated++
246 | if (relationcnt % interval == 0) print(relationcnt + '...');
247 | }
248 | }
249 |
250 | Osmium.Callbacks.init = function()
251 | {
252 | print('Running...');
253 | t0 = new Date();
254 | }
255 |
256 | Osmium.Callbacks.node = function()
257 | {
258 | if (!doingnodes)
259 | {
260 | // The before_* callbacks are not called, so we need a workaround.
261 | doingnodes = true;
262 | tnodes0 = new Date();
263 | print('parsing nodes...');
264 | }
265 |
266 | if (pf)
267 | {
268 | processlastfeature(this);
269 | }
270 | pf = cloneFeature(this);
271 | }
272 |
273 | Osmium.Callbacks.way = function()
274 | {
275 | if (doingnodes)
276 | {
277 | // The before_* callbacks are not called, so we need a workaround.
278 | // process last node before doing ways
279 | processlastfeature(pf);
280 | delete pf;
281 | doingnodes = false;
282 | doingways = true;
283 | tnodes1 = new Date();
284 | print('parsing ways...');
285 | }
286 |
287 | if (pf)
288 | {
289 | processlastfeature(this);
290 | }
291 | pf = cloneFeature(this);
292 | }
293 |
294 | Osmium.Callbacks.relation = function()
295 | {
296 | if (doingways)
297 | {
298 | // The before_* callbacks are not called, so we need a workaround.
299 | processlastfeature(pf);
300 | delete pf;
301 | doingways = false;
302 | doingrelations = true;
303 | tways1 = new Date();
304 | print('parsing relations...');
305 | }
306 |
307 | if (pf) processlastfeature(this);
308 | pf = cloneFeature(this);
309 | }
310 |
311 | Osmium.Callbacks.end = function()
312 | {
313 | print('output and cleanup...');
314 |
315 | trelations1 = new Date();
316 | users.sort(sort_by_totals);
317 | var realusercnt = 0;
318 |
319 | // Open output file in OUT_DIR
320 | var out = Osmium.Output.CSV.open(OUT_DIR + '/userstats.csv');
321 |
322 | // Print headers
323 | out.print('uid\tusername\tnodes\tnodes_created\tcur nodes\tways\tways_created\tcur ways\trelations\trelations_created\tcur rels\tfirst\tlast\tdays active\ttotal edits\tcurrent objects\tavg edits/day\tpersistence');
324 |
325 | // Caluculate metrics for each user
326 | for (var i=0;i=0;j-=1) {
128 | if(t<(now-thresholds[j])) {
129 | cohorts[j+1]+=1;
130 | cohorted = true;
131 | break;
132 | }
133 | }
134 | if(!cohorted) cohorts[0]+=1;
135 | }
136 | for(var j=cohorts.length-1;j>=0;j-=1) {
137 | cohorts[j] = ((cohorts[j] / ary.length) * 100).toFixed(1);
138 | }
139 | return cohorts;
140 | }
141 |
142 | function sort_by_rank(a,b) {
143 | return ((a.rank() < b.rank()) ? 1 : (a.rank() > b.rank()) ? -1 : 0);
144 | }
145 |
146 | function sort_by_totals(a,b) {
147 | return ((a.nodes + a.ways + a.relations) < (b.nodes + b.ways + b.relations) ? 1 : (a.nodes + a.ways + a.relations) > (b.nodes + b.ways + b.relations) ? -1 : 0);
148 | }
149 |
150 |
151 | Osmium.Callbacks.init = function() {
152 | print('Running...');
153 | t0 = new Date();
154 | if(OUTPUT_WAYS) {
155 | shp = Osmium.Output.Shapefile.open(OUT_DIR + 'ways', 'line');
156 | shp.add_field('id', 'integer', 10);
157 | shp.add_field('name', 'string', 40);
158 | shp.add_field('version','integer',5);
159 | shp.add_field('timestamp','integer', 16);
160 | }
161 | }
162 |
163 | Osmium.Callbacks.node = function() {
164 | if (!doingnodes) {
165 | // The before_* callbacks are not called, so we need a workaround.
166 | doingnodes = true;
167 | tnodes0 = new Date();
168 | print('parsing nodes...');
169 | }
170 |
171 | if(!users[this.uid]) {
172 | users[this.uid] = new User(this.uid,this.user);
173 | usercnt += 1;
174 | }
175 | users[this.uid].nodes+=1;
176 | for(var key in this.tags) {
177 | nodetags+=1;
178 | if (key in poikeys) poicnt += 1;
179 | if (key in transportkeys) transportcnt += 1;
180 | if (key in namekeys) namecnt += 1;
181 | }
182 | nodecnt+=1;
183 | nodes[this.id] = 0;
184 | ages.push(Math.round(new Date(this.timestamp).getTime()/1000));
185 | avgnodeversion = avgnodeversion + (this.version - avgnodeversion) / nodecnt;
186 | }
187 |
188 | Osmium.Callbacks.way = function() {
189 | /* todo:
190 | * average length of road segments
191 | * shapes for certain tiger features
192 | * turn restrictions
193 | */
194 |
195 | if(OUTPUT_WAYS) {
196 | shp.add(this.geom, { id: this.id, name: this.tags.name, version: this.version, timestamp: Math.round(new Date(this.timestamp).getTime()/1000) });
197 | }
198 |
199 | if (doingnodes) {
200 | // The before_* callbacks are not called, so we need a workaround.
201 | doingnodes = false;
202 | doingways = true;
203 | tnodes1 = new Date();
204 | print('parsing ways...');
205 | }
206 | var tiger = false;
207 | if(!users[this.uid]) {
208 | users[this.uid] = new User(this.uid,this.user);
209 | usercnt += 1;
210 | }
211 | users[this.uid].ways+=1;
212 | waycnt+=1;
213 | ages.push(Math.round(new Date(this.timestamp).getTime()/1000));
214 | for (var i=0; i < this.nodes.length; i++) {
215 | nodes[this.nodes[i]] = 1;
216 | }
217 | for(var key in this.tags) {
218 | waytags+=1;
219 | tiger=(key.match(/tiger/ig))
220 | if(key.match(/tiger:cfcc/ig)) {
221 | tigerbreakdown[this.tags[key]] = isNaN(tigerbreakdown[this.tags[key]]) ? 1 : tigerbreakdown[this.tags[key]] + 1;
222 | }
223 | }
224 | if(tiger) {
225 | tigerways++;
226 | if(this.version==1) tiger_untouched++;
227 | tigerversionincrease = tigerversionincrease + (this.version - 1 - tigerversionincrease) / tigerways;
228 | }
229 | avgwayversion = avgwayversion + (this.version - avgwayversion) / waycnt;
230 | }
231 |
232 | Osmium.Callbacks.relation = function() {
233 | if (doingways) {
234 | // The before_* callbacks are not called, so we need a workaround.
235 | doingways = false;
236 | doingrelations = true;
237 | tways1 = new Date();
238 | print('parsing relations...');
239 | }
240 |
241 | if(!users[this.uid]) {
242 | users[this.uid] = new User(this.uid,this.user);
243 | usercnt += 1;
244 | }
245 | users[this.uid].relations+=1;
246 | relationcnt+=1;
247 | ages.push(Math.round(new Date(this.timestamp).getTime()/1000));
248 | for(var key in this.tags) {
249 | relationtags+=1;
250 | if (key.match(/type/i)) {
251 | relation_types[this.tags[key]] = isNaN(relation_types[this.tags[key]]) ? 1 : relation_types[this.tags[key]] + 1;
252 | }
253 | }
254 | avgrelationversion = avgrelationversion + (this.version - avgrelationversion) / relationcnt;
255 | }
256 |
257 | Osmium.Callbacks.end = function() {
258 | print('output and cleanup...');
259 |
260 | // CLEAN UP
261 | trelations1 = new Date();
262 | users.sort(sort_by_totals);
263 | if(OUTPUT_WAYS) shp.close();
264 |
265 | var out = Osmium.Output.CSV.open(OUT_DIR + 'userstats.csv');
266 | out.print('uid\tusername\tnodes\tways\trelations\tpercentile');
267 | var cumfeatures = 0;
268 | var grandtotal = nodecnt + waycnt + relationcnt;
269 | var realusercnt = 0;
270 | var user_thresholds = [0.9,0.95, 0.99];
271 | var user_threshold_met = 0;
272 | var users_for_threshold = [];
273 | var userperc_for_threshold = [];
274 |
275 | // WRITE USER STATS TO FILE
276 | for (var i=0;i user_thresholds[user_threshold_met]) {
282 | users_for_threshold.push(i+1);
283 | user_threshold_met +=1;
284 | }
285 | }
286 |
287 | for(var i=0;i