├── compare_osm_data.py
├── merge_osm_data.py
├── run.py
├── get_diff_stats.py
├── get_tm_osm_data.py
└── README.md
/compare_osm_data.py:
--------------------------------------------------------------------------------
1 | #!/bin/python
2 | # -*- coding: UTF-8 -*-
3 | # Author: B. Herfort, 2016, GIScience Heidelberg
4 | ###########################################
5 |
6 | import sys
7 | import os
8 |
9 | def main(t1_osm,t2_osm,out_file):
10 |
11 | cwd = os.getcwd()
12 | osmosis = cwd + '\\osmosis-latest\\bin\\osmosis.bat'
13 | print osmosis
14 |
15 | cmd = osmosis + ' --read-xml file="'+t1_osm+ '" --sort --read-xml file="'+t2_osm+'" --sort --derive-change --write-xml-change file="'+out_file+'"'
16 |
17 | os.system(cmd)
18 |
19 |
20 | if __name__ == "__main__":
21 |
22 | #
23 | # example run : $ python compare_osm_data.py time1.osm time2.osm diff_2.osc
24 | #
25 |
26 | if len( sys.argv ) != 4:
27 | print "[ ERROR ] you must supply 3 arguments: time1.osm time2.osm out_file"
28 | sys.exit( 1 )
29 |
30 | main( sys.argv[1], sys.argv[2], sys.argv[3] )
31 |
--------------------------------------------------------------------------------
/merge_osm_data.py:
--------------------------------------------------------------------------------
1 | #!/bin/python
2 | # -*- coding: UTF-8 -*-
3 | # Author: B. Herfort, 2016, GIScience Heidelberg
4 | ###########################################
5 |
6 | import sys
7 | import os
8 | import time
9 |
10 | def get_all_files(directory,extension):
11 |
12 | #create list for all selected files
13 | selected_files = []
14 |
15 | #iterate over all items in directory and check file extension
16 | for item in os.listdir(directory):
17 | #change directory
18 | os.chdir(directory)
19 | #get file extension of current item
20 | item_ext = item.split(".")[-1]
21 |
22 | #check whether item is a file and has right file extension
23 | if os.path.isfile(item) and item_ext == extension:
24 | selected_files.append(item)
25 |
26 | return selected_files
27 |
28 |
29 | def main(directory,output_file,extension):
30 |
31 | #Take start time
32 | start_time = time.time()
33 |
34 | selected_files = get_all_files(directory,extension)
35 | cmd_middle = ''
36 | cmd_suffix = ''
37 |
38 | for i in range(0,len(selected_files)):
39 |
40 | #copy first file
41 | if i == 0:
42 | filename = directory + '\\' + selected_files[i]
43 |
44 | cwd = os.getcwd()
45 | osmosis = os.path.dirname(cwd) + '\\osmosis-latest\\bin\\osmosis.bat'
46 | cmd_1 = osmosis + ' --read-xml file="'+filename+'" --sort --write-xml file="'+output_file+'"'
47 | os.system(cmd_1)
48 |
49 | else:
50 | filename = directory + '\\' + selected_files[i]
51 | cmd_middle = cmd_middle + '--read-xml file="'+filename+'" --sort '
52 | cmd_suffix = cmd_suffix + '--merge '
53 |
54 | if (i%50 == 0 ) or (i == (len(selected_files)-1)):
55 | print i
56 | osmosis = os.path.dirname(cwd) + '\\osmosis-latest\\bin\\osmosis.bat'
57 | cmd_prefix = osmosis +' --read-xml file="'+output_file+'" --sort '
58 | cmd = cmd_prefix + cmd_middle + cmd_suffix + ' --write-xml file="'+output_file+'" -q'
59 | os.system(cmd)
60 | cmd = ''
61 | cmd_middle = ''
62 | cmd_suffix = ''
63 |
64 | #Take end time and calculate program run time
65 | end_time = time.time()
66 | run_time = end_time - start_time
67 |
68 | print '############ END ######################################'
69 | print '##'
70 | print '## output file: '+output_file
71 | print '##'
72 | print '## runtime: '+str(run_time)+' s'
73 | print '##'
74 | print '## B. Herfort, GIScience Research Group'
75 | print '##'
76 | print '#######################################################'
77 |
78 | if __name__ == "__main__":
79 |
80 | #
81 | # example run : $ python merge_osm_data.py D:/temp/osm_data merge.osm osm
82 | #
83 |
84 | if len( sys.argv ) != 4:
85 | print "[ ERROR ] you must supply 3 arguments: osm_data_directory output_file file_extension"
86 | sys.exit( 1 )
87 |
88 | main( sys.argv[1], sys.argv[2], sys.argv[3])
89 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | #!/bin/python
2 | # -*- coding: UTF-8 -*-
3 | # Author: B. Herfort, 2016, GIScience Heidelberg
4 | ###########################################
5 |
6 | import os
7 | import sys
8 | import sched
9 | import time
10 |
11 | count = 0
12 | timestamp_list = []
13 | task_id = sys.argv[1]
14 |
15 | def do_something_else(sc):
16 | print "start next run..."
17 |
18 | #get time
19 | lt = time.localtime()
20 |
21 | timestamp = str(lt[0])+'_'+str(lt[1])+'_'+str(lt[2])+'_'+str(lt[3])+'_'+str(lt[4])
22 | timestamp_list.append(timestamp)
23 | print 'timestamp: %s' % timestamp
24 |
25 | #create osm data directory
26 | cwd = os.getcwd()
27 | osm_data_dir = cwd + '\osm_data'+'_'+timestamp
28 |
29 | os.mkdir(osm_data_dir)
30 |
31 | #get osm data for tasking manager project
32 | print 'Download OSM data from OpenStreetMap API. This may take some time...'
33 | cmd = 'python get_tm_osm_data.py '+task_id+' '+osm_data_dir
34 | os.system(cmd)
35 |
36 |
37 | #merge osm data
38 | print 'Merge .osm files'
39 | output_file = osm_data_dir + '\merge_'+timestamp+'.osm'
40 | cmd = 'python merge_osm_data.py ' + osm_data_dir + ' ' + output_file + ' osm'
41 | os.system(cmd)
42 |
43 |
44 | #compare osm data
45 | print 'Compare two .osm files and derive change'
46 | new_osm = cwd + '\osm_data'+'_'+timestamp_list[len(timestamp_list)-1]+'\merge_'+timestamp_list[len(timestamp_list)-1]+'.osm'
47 | old_osm = cwd + '\osm_data'+'_'+timestamp_list[len(timestamp_list)-2]+'\merge_'+timestamp_list[len(timestamp_list)-2]+'.osm'
48 | out_file = cwd + '\output\diff_'+timestamp+'.osc'
49 | cmd = 'python compare_osm_data.py ' + new_osm + ' ' + old_osm + ' ' + out_file
50 | os.system(cmd)
51 |
52 |
53 | #calculate diff statictics
54 | diff_file = out_file
55 | out_dir = cwd + '\output'
56 | print diff_file
57 | cmd = 'python get_diff_stats.py ' + diff_file +' ' + out_dir
58 | os.system(cmd)
59 |
60 | sc.enter(900, 1, do_something_else, (sc,))
61 |
62 |
63 | def do_something(sc):
64 | print "start first run..."
65 |
66 | #get time
67 | lt = time.localtime()
68 |
69 | timestamp = str(lt[0])+'_'+str(lt[1])+'_'+str(lt[2])+'_'+str(lt[3])+'_'+str(lt[4])
70 | timestamp_list.append(timestamp)
71 |
72 | print 'timestamp: %s' % timestamp
73 |
74 | #timestamp = '2016_6_29_17_4'
75 |
76 | #create osm data directory
77 | cwd = os.getcwd()
78 | osm_data_dir = cwd + '\osm_data'+'_'+timestamp
79 | out_dir = cwd + '\output'
80 |
81 | if not os.path.exists(out_dir):
82 | os.makedirs(out_dir)
83 |
84 | #os.mkdir(osm_data_dir)
85 | #os.mkdir(out_dir)
86 |
87 |
88 | #get osm data for tasking manager project
89 | print 'Download OSM data from OpenStreetMap API. This may take some minutes...'
90 | cmd = 'python get_tm_osm_data.py '+task_id+' '+osm_data_dir
91 | os.system(cmd)
92 |
93 |
94 | #merge osm data
95 | print 'Merge .osm files'
96 | output_file = osm_data_dir + '\merge_'+timestamp+'.osm'
97 | cmd = 'python merge_osm_data.py ' + osm_data_dir + ' ' + output_file + ' osm'
98 | os.system(cmd)
99 |
100 | outputAsc = out_dir + '\diff_stats.txt'
101 | fileout = file(outputAsc, "w")
102 | header = 'diff_file;nodes_created;nodes_modified;nodes_deleted;ways_created;ways_modified;ways_deleted;buildings_created;buildings_modified;buildings_deleted\n'
103 | fileout.write(header)
104 | fileout.close()
105 |
106 | sc.enter(900, 1, do_something_else, (sc,))
107 |
108 |
109 |
110 | s = sched.scheduler(time.time, time.sleep)
111 | s.enter(1, 1, do_something, (s,))
112 | s.run()
113 |
--------------------------------------------------------------------------------
/get_diff_stats.py:
--------------------------------------------------------------------------------
1 | #!/bin/python
2 | # -*- coding: UTF-8 -*-
3 | # Author: B. Herfort, 2016, GIScience Heidelberg
4 | ###########################################
5 |
6 | import sys
7 | import os
8 |
9 | class nodes:
10 | def __init__(self, x=0, y=0, z=0):
11 | self.created = x
12 | self.modified = y
13 | self.deleted = z
14 |
15 | class ways:
16 | def __init__(self, x=0, y=0, z=0):
17 | self.created = x
18 | self.modified = y
19 | self.deleted = z
20 |
21 | class buildings:
22 | def __init__(self, x=0, y=0, z=0):
23 | self.created = x
24 | self.modified = y
25 | self.deleted = z
26 |
27 |
28 |
29 | def main(diff_file, out_dir):
30 |
31 |
32 | Nodes = nodes()
33 | Ways = ways()
34 | Buildings = buildings()
35 |
36 | #read osm diff data
37 | filein = file(diff_file, "r")
38 |
39 | lines = filein.readlines()
40 |
41 | for i in range(0,len(lines)):
42 | #find all new nodes and ways
43 | if lines[i].startswith(' '):
44 | for j in range(0,len(lines)-i):
45 | if lines[i+j].startswith(' '):
46 | break
47 | if lines[i+j].startswith(' '):
53 | break
54 | if lines[i+j+k].startswith(' '):
60 | for j in range(0,len(lines)-i):
61 | if lines[i+j].startswith(' '):
62 | break
63 | if lines[i+j].startswith(' '):
69 | break
70 | if lines[i+j+k].startswith(' '):
75 | for j in range(0,len(lines)-i):
76 | if lines[i+j].startswith(' '):
77 | break
78 | if lines[i+j].startswith(' '):
84 | break
85 | if lines[i+j+k].startswith(' these files will be stored in a separate folder for each timestamp
22 | - Changes between different timestamps, .osc files --> these files will be stored in the "output" folder
23 | - statistics file with information on deleted, modified and created nodes and ways per time intervall, .txt file --> this file will be stored in the "output" folder
24 | - statistics figures --> not implemented yet
25 |
26 | Workflow:
27 |
28 | 1. First run:
29 | - download OSM data for timestep 1 --> uses get_tm_osm_data.py
30 | - merge single files for timestep 1 --> uses merge_osm_data.py
31 | - wait 15 minutes
32 |
33 | 2. Second run and further runs:
34 | - download OSM data for timestep 2 (3,...) --> uses get_tm_osm_data.py
35 | - merge single files for timestep 2 (3,...) --> uses merge_osm_data.py
36 | - compare timestep 1 and timestep 2 (or timestep 2 and timestep 3,...) --> uses compare_osm_data.py
37 | - calculate statistics and add to statistics file --> uses get_diff_stats.py
38 | - wait 15 minutes, then start again with 2.1
39 |
40 | Issues:
41 | - you may change the waiting time (in seconds) at lines 60 and 106
42 |
43 | ## Description for individual scripts
44 |
45 | ### get_tm_osm_data.py:
46 | - example run: python get_tm_osm_data.py 1251 D:/temp/osm_data
47 |
48 | Input arguments:
49 | - HOT Task id (e.g. '1251') Have a look at the Tasking Manager (http://tasks.hotosm.org/) for ongoing projects.
50 | - output directory
51 |
52 | Output:
53 | - many .osm files within the output directory
54 |
55 | Workflow:
56 |
57 | 1. downloads information about the Tasking Manager project
58 | 2. iterates over each tile in the Tasking Manager project
59 | 3. downloads all OSM data via the OpenStreetMap API given the bounding box for each tile
60 |
61 | Issues:
62 | - download from OpenStreetMap API may fail if the bounding box is too big
63 | - download may take some time depending on the amount of data in OSM
64 | - you can think about use cases where you are only interested in specific objects or object types: the overpass api is better suited for those scenarios
65 |
66 | ### merge_osm_data.py:
67 | - example run: 'python merge_osm_data.py D:/temp/osm_data merge.osm osm'
68 |
69 | Input arguments:
70 | - input directory, e.g. D:/temp/osm_data
71 | - output file name, e.g. merge.osm
72 | - file extension, e.g. .osm for OSM file, .osc for OSM change files
73 |
74 | Output
75 | - merged OSM data into one file
76 |
77 | Issues:
78 | - uses the osmosis tool --> you need to have this tool in the same directory or point to the correct directory withing the script at lines 45 and 58
79 |
80 | ### compare_osm_data.py:
81 | - example run: 'python compare_osm_data.py time1.osm time2.osm change.osc'
82 |
83 | Input arguments:
84 | - .osm file for first timestamp, e.g. time1.osm
85 | - .osm file for second timestamp, the "new" one, e.g. time2.osm
86 | - output file name with .osc extension --> OSM change files have the extension .osc
87 |
88 | Output:
89 | - difference between the two timestamps in .osc format --> this file can be easily visualized using JOSM (https://josm.openstreetmap.de/)
90 | - contains information on created, modified and deleted OSM objects
91 |
92 | Issues:
93 | - uses the osmosis tool --> you need to have this tool in the same directory or point to the correct directory withing the script at line 12
94 | - time1.osm and time2.osm should cover the same area, otherwise this analysis is quite meaningless
95 |
96 | ### get_diff_stats.py:
97 | - example run: 'python get_diff_stats.py diff.osc D:/temp/osm_data'
98 |
99 | Input arguments:
100 | - .osc file that contains information on changes between two timestamps
101 | - output directory, e.g. D:/temp/osm_data
102 |
103 | Output:
104 | - .txt file with information on number of created, modified and deleted OSM objects
105 | - header: diff_file;nodes_created;nodes_modified;nodes_deleted;ways_created;ways_modified;ways_deleted;buildings_created;buildings_modified;buildings_deleted
106 | - delimiter ';'
107 |
108 |
109 |
110 |
111 |
112 |
113 |
--------------------------------------------------------------------------------