├── LICENSE
├── README.rst
├── geopandas_osm
├── __init__.py
└── osm.py
└── setup.py
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2014 Jacob Wasserman
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | geopandas_osm
2 | ======
3 |
4 | A GeoPandas interface to query OpenStreetMap Overpass API
5 |
6 | Quick start
7 | -------------------------
8 |
9 | Assuming you have a polygon for a boundary
10 |
11 | .. code-block:: python
12 |
13 | import json
14 |
15 | import shapely.geometry
16 | import geopandas_osm.osm
17 |
18 | with open('boundary.geojson') as f:
19 | data = json.load(f)
20 |
21 | poly = shapely.geometry.shape(data['features'][0]['geometry'])
22 | df = geopandas_osm.osm.query_osm('way', poly, recurse='down', tags='highway')
23 |
24 | roads = df[df.type == 'LineString'][['highway', 'name', 'geometry']]
25 |
26 |
27 |
28 | Project skeleton based on http://github.com/mapbox/pyskel
29 |
--------------------------------------------------------------------------------
/geopandas_osm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jwass/geopandas_osm/07e777467e244dfbf8692e7f71ce61bcdaec8682/geopandas_osm/__init__.py
--------------------------------------------------------------------------------
/geopandas_osm/osm.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import xml.etree.ElementTree as ET
3 |
4 | import fiona.crs
5 | import geopandas as gpd
6 | from pandas.io.common import urlopen, urlencode
7 | import pandas as pd
8 | from shapely.geometry import Point, LineString
9 | from six import string_types
10 |
11 | OSMData = collections.namedtuple('OSMData', ('nodes', 'waynodes', 'waytags',
12 | 'relmembers', 'reltags'))
13 | _crs = fiona.crs.from_epsg(4326)
14 |
15 | # Tags to remove so we don't clobber the output. This list comes from
16 | # osmtogeojson's index.js (https://github.com/tyrasd/osmtogeojson)
17 | uninteresting_tags = set([
18 | "source",
19 | "source_ref",
20 | "source:ref",
21 | "history",
22 | "attribution",
23 | "created_by",
24 | "tiger:county",
25 | "tiger:tlid",
26 | "tiger:upload_uuid",
27 | ])
28 |
29 |
30 | # http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
31 | def query_osm(typ, bbox=None, recurse=None, tags='', raw=False,
32 | meta=False, **kwargs):
33 | """
34 | Query the Overpass API to obtain OpenStreetMap data.
35 |
36 | See also:
37 | http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
38 |
39 | The OSM XML data is parsed into an intermediate set of DataFrames.
40 | By passing in 'render=False', this will return these DataFrames stored
41 | as the OSMData namedtuple. If render is True, then the DataFrames
42 | are built into their corresponding geometries.
43 |
44 | Parameters
45 | ----------
46 | typ : {'node', 'way', 'relation'}
47 | The type of OSM data to query
48 | bbox : (min lon, min lat, max lon, max lat) bounding box
49 | Optional bounding box to restrict the query. Unless the query
50 | is extremely restricted, you usually want to specify this.
51 | It can be retrieved from GeoPandas objects as 'df.total_bounds' or
52 | from Shapely objects as 'geom.bounds'
53 | recurse : {'up, 'down', 'uprel', 'downrel'}
54 | This is used to get more data than the original query. If 'typ' is
55 | 'way', you'll usually want this set to 'down' which grabs all nodes
56 | of the matching ways
57 | tags : string or list of query strings
58 | See also the OverpassQL (referenced above) for more tag options
59 | Examples:
60 | tags='highway'
61 | Matches objects with a 'highway' tag
62 | tags='highway=motorway' <-- Matches ob
63 | Matches objects where the 'highway' tag is 'motorway'
64 | tags='name~[Mm]agazine'
65 | Match if the 'name' tag matches the regular expression
66 |
67 | Specify a list of tag requests to match all of them
68 | tags=['highway', 'name~"^Magazine"']
69 | Match tags that have 'highway' and where 'name' starts
70 | with 'Magazine'
71 |
72 | raw : boolean, default False
73 | Return the raw XML data returned by the request
74 | render : boolean, default True
75 | Parse the output and return a final GeoDataFrame
76 | meta : boolean, default False
77 | Indicates whether to query the metadata with each OSM object. This
78 | includes the changeset, timestamp, uid, user, and version.
79 |
80 | Returns
81 | -------
82 | df - GeoDataFrame
83 | Note that there's probably a bit more filtering required to get the
84 | exact desired data. For example if you only want ways, you may want
85 | to grab only the linestrings like:
86 | >>> df = df[df.type == 'LineString']
87 |
88 | """
89 | url = _build_url(typ, bbox, recurse, tags, meta)
90 |
91 | # TODO: Raise on non-200 (or 400-599)
92 | with urlopen(url) as response:
93 | content = response.read()
94 |
95 | if raw:
96 | return content
97 | return read_osm(content, **kwargs)
98 |
99 |
100 | def _build_url(typ, bbox=None, recurse=None, tags='', meta=False):
101 | recurse_map = {
102 | 'up': '<',
103 | 'uprel': '<<',
104 | 'down': '>',
105 | 'downrel': '>>',
106 | }
107 | if recurse is None:
108 | recursestr = ''
109 | else:
110 | try:
111 | recursestr = recurse_map[recurse]
112 | except KeyError:
113 | raise ValueError("Unrecognized recurse value '{}'. "
114 | "Must be one of: {}."
115 | .format(recurse, ', '.join(recurse_map.keys())))
116 |
117 | # Allow tags to be a single string
118 | if isinstance(tags, string_types) and tags:
119 | tags = [tags]
120 | queries = ''.join('[{}]'.format(t) for t in tags)
121 |
122 | # Overpass QL takes the bounding box as
123 | # (min latitude, min longitude, max latitude, max longitude)
124 | if bbox is None:
125 | bboxstr = ''
126 | else:
127 | #bboxstr = "({})".format(
128 | #','.join(str(b) for b in (bbox[1], bbox[0], bbox[3], bbox[2])))
129 | bboxstr = '(poly:"{}")'.format(
130 | ' '.join('{c[1]} {c[0]}'.format(c=c) for c in bbox.exterior.coords))
131 |
132 | if meta:
133 | metastr = 'meta'
134 | else:
135 | metastr = ''
136 |
137 | query = '({typ}{bbox}{queries};{recurse};);out {meta};'.format(
138 | typ=typ, bbox=bboxstr, queries=queries, recurse=recursestr, meta=metastr)
139 |
140 | url = ''.join(['http://www.overpass-api.de/api/interpreter?',
141 | urlencode({'data': query})])
142 |
143 | return url
144 |
145 |
146 | def read_osm(content, render=True, **kwargs):
147 | """
148 | Parse OSM XML data and store as several DataFrames. Optionally "render"
149 | the DataFrames to GeoDataFrames.
150 |
151 | """
152 | doc = ET.fromstring(content)
153 |
154 | nodes = read_nodes(doc)
155 | waynodes, waytags = read_ways(doc)
156 | relmembers, reltags = read_relations(doc)
157 |
158 | data = OSMData(nodes, waynodes, waytags, relmembers, reltags)
159 |
160 | if render:
161 | data = render_to_gdf(data, **kwargs)
162 |
163 | return data
164 |
165 |
166 | def read_nodes(doc):
167 | # Example:
168 | #
169 | #
170 | #
171 | #
172 | #
173 | nodes = [_element_to_dict(xmlnode) for xmlnode in doc.findall('node')]
174 | nodes = _dict_to_dataframe(nodes)
175 | nodes['lon'] = nodes['lon'].astype(float)
176 | nodes['lat'] = nodes['lat'].astype(float)
177 |
178 | return nodes
179 |
180 |
181 | def _element_to_dict(element):
182 | d = element.attrib.copy()
183 | for t in element.findall('tag'):
184 | k = t.attrib['k']
185 | if k not in uninteresting_tags:
186 | d[k] = t.attrib['v']
187 |
188 | return d
189 |
190 |
191 | def _dict_to_dataframe(d):
192 | df = pd.DataFrame.from_dict(d)
193 | if 'timestamp' in df:
194 | df['timestamp'] = pd.to_datetime(df['timestamp'])
195 |
196 | return df
197 |
198 |
199 | def read_ways(doc):
200 | # Example:
201 | #
202 | #
203 | #
204 | #
205 | #
206 | #
207 | #
208 | #
209 | #
210 | #
211 | #
212 | #
213 | #
214 | #
215 | waytags = []
216 | waynodes = []
217 | for xmlway in doc.findall('way'):
218 | wayid = xmlway.attrib['id']
219 | for i, xmlnd in enumerate(xmlway.findall('nd')):
220 | d = xmlnd.attrib.copy()
221 | d['id'] = wayid
222 | d['index'] = i
223 | waynodes.append(d)
224 |
225 | tags = _element_to_dict(xmlway)
226 | waytags.append(tags)
227 |
228 | waynodes = _dict_to_dataframe(waynodes)
229 | waytags = _dict_to_dataframe(waytags)
230 |
231 | return waynodes, waytags
232 |
233 |
234 | def read_relations(doc):
235 | # Example:
236 | #
237 | #
238 | #
239 | #
240 | #
241 | #
242 | #
243 | #
244 | #
245 | #
246 | #
247 | #
248 | #
249 | #
250 | reltags = []
251 | relmembers = []
252 | for xmlrel in doc.findall('relation'):
253 | relid = xmlrel.attrib['id']
254 | for i, xmlmember in enumerate(xmlrel.findall('member')):
255 | d = xmlmember.attrib.copy()
256 | d['id'] = relid
257 | d['index'] = i
258 | relmembers.append(d)
259 |
260 | tags = _element_to_dict(xmlrel)
261 | reltags.append(tags)
262 |
263 | relmembers = _dict_to_dataframe(relmembers)
264 | reltags = _dict_to_dataframe(reltags)
265 |
266 | return relmembers, reltags
267 |
268 |
269 | def render_to_gdf(osmdata, drop_untagged=True):
270 | nodes = render_nodes(osmdata.nodes, drop_untagged)
271 |
272 | ways = render_ways(osmdata.nodes, osmdata.waynodes, osmdata.waytags)
273 | if ways is not None:
274 | # We should get append working
275 | nodes = nodes.append(ways).set_geometry('geometry', crs=_crs)
276 |
277 | return nodes
278 |
279 |
280 | def render_nodes(nodes, drop_untagged=True):
281 | # Drop nodes that have no tags, convert lon/lat to points
282 | if drop_untagged:
283 | nodes = nodes.dropna(subset=nodes.columns.drop(['id', 'lon', 'lat']),
284 | how='all')
285 | points = [Point(x['lon'], x['lat']) for i, x in nodes.iterrows()]
286 | nodes = nodes.drop(['lon', 'lat'], axis=1)
287 | nodes = nodes.set_geometry(points, crs=_crs)
288 |
289 | return nodes
290 |
291 |
292 | def render_ways(nodes, waynodes, waytags):
293 | if waynodes is None or waynodes.empty:
294 | return None
295 |
296 | node_points = nodes[['id', 'lon', 'lat']]
297 |
298 | def wayline(df):
299 | df = df.sort_index(by='index')[['lon', 'lat']]
300 | return LineString(df.values)
301 |
302 | # Group the ways and create a LineString for each one. way_lines is a
303 | # Series where the index is the way id and the value is the LineString.
304 | # Merge it with the waytags to get a single GeoDataFrame of ways
305 | waynodes = waynodes.merge(node_points, left_on='ref', right_on='id',
306 | suffixes=('', '_nodes'))
307 | way_lines = waynodes.groupby('id').apply(wayline)
308 | ways = waytags.set_index('id').set_geometry(way_lines, crs=_crs)
309 | ways.reset_index(inplace=True)
310 |
311 | return ways
312 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from __future__ import unicode_literals
2 |
3 | from codecs import open as codecs_open
4 | from setuptools import setup, find_packages
5 |
6 |
7 | # Get the long description from the relevant file
8 | with codecs_open('README.rst', encoding='utf-8') as f:
9 | long_description = f.read()
10 |
11 |
12 | setup(name='geopandas_osm',
13 | version='0.0.1',
14 | description="Skeleton of a Python package",
15 | long_description=long_description,
16 | classifiers=[],
17 | keywords='',
18 | author="Jacob Wasserman",
19 | author_email='jwasserman@gmail.com',
20 | url='https://github.com/jwass/geopandas_osm',
21 | license='MIT',
22 | packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
23 | include_package_data=True,
24 | zip_safe=False,
25 | install_requires=[
26 | 'six',
27 | 'geopandas',
28 | ],
29 | extras_require={
30 | 'test': ['pytest'],
31 | },
32 | entry_points="""
33 | [console_scripts]
34 | """
35 | )
36 |
--------------------------------------------------------------------------------