├── LICENSE
├── README.rst
├── geopandas_osm
    ├── __init__.py
    └── osm.py
└── setup.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Jacob Wasserman
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | geopandas_osm
 2 | ======
 3 | 
 4 | A GeoPandas interface to query OpenStreetMap Overpass API
 5 |    
 6 | Quick start
 7 | -------------------------
 8 | 
 9 | Assuming you have a polygon for a boundary
10 | 
11 | .. code-block:: python
12 | 
13 |     import json
14 |     
15 |     import shapely.geometry
16 |     import geopandas_osm.osm
17 |     
18 |     with open('boundary.geojson') as f:
19 |         data = json.load(f)
20 |         
21 |     poly = shapely.geometry.shape(data['features'][0]['geometry'])
22 |     df = geopandas_osm.osm.query_osm('way', poly, recurse='down', tags='highway')
23 |     
24 |     roads = df[df.type == 'LineString'][['highway', 'name', 'geometry']]
25 |     
26 | 
27 | 
28 | Project skeleton based on http://github.com/mapbox/pyskel
29 | 


--------------------------------------------------------------------------------
/geopandas_osm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jwass/geopandas_osm/07e777467e244dfbf8692e7f71ce61bcdaec8682/geopandas_osm/__init__.py


--------------------------------------------------------------------------------
/geopandas_osm/osm.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import xml.etree.ElementTree as ET
  3 | 
  4 | import fiona.crs
  5 | import geopandas as gpd
  6 | from pandas.io.common import urlopen, urlencode
  7 | import pandas as pd
  8 | from shapely.geometry import Point, LineString
  9 | from six import string_types
 10 | 
 11 | OSMData = collections.namedtuple('OSMData', ('nodes', 'waynodes', 'waytags',
 12 |                                              'relmembers', 'reltags'))
 13 | _crs = fiona.crs.from_epsg(4326)
 14 | 
 15 | # Tags to remove so we don't clobber the output. This list comes from
 16 | # osmtogeojson's index.js (https://github.com/tyrasd/osmtogeojson)
 17 | uninteresting_tags = set([
 18 |     "source",
 19 |     "source_ref",
 20 |     "source:ref",
 21 |     "history",
 22 |     "attribution",
 23 |     "created_by",
 24 |     "tiger:county",
 25 |     "tiger:tlid",
 26 |     "tiger:upload_uuid",
 27 | ])
 28 | 
 29 | 
 30 | # http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
 31 | def query_osm(typ, bbox=None, recurse=None, tags='', raw=False, 
 32 |               meta=False, **kwargs):
 33 |     """
 34 |     Query the Overpass API to obtain OpenStreetMap data.
 35 |     
 36 |     See also:
 37 |     http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
 38 | 
 39 |     The OSM XML data is parsed into an intermediate set of DataFrames.
 40 |     By passing in 'render=False', this will return these DataFrames stored
 41 |     as the OSMData namedtuple. If render is True, then the DataFrames
 42 |     are built into their corresponding geometries.
 43 | 
 44 |     Parameters
 45 |     ----------
 46 |     typ : {'node', 'way', 'relation'}
 47 |         The type of OSM data to query
 48 |     bbox : (min lon, min lat, max lon, max lat) bounding box
 49 |         Optional bounding box to restrict the query. Unless the query
 50 |         is extremely restricted, you usually want to specify this.
 51 |         It can be retrieved from GeoPandas objects as 'df.total_bounds' or
 52 |         from Shapely objects as 'geom.bounds'
 53 |     recurse : {'up, 'down', 'uprel', 'downrel'}
 54 |         This is used to get more data than the original query. If 'typ' is
 55 |         'way', you'll usually want this set to 'down' which grabs all nodes
 56 |         of the matching ways
 57 |     tags : string or list of query strings
 58 |         See also the OverpassQL (referenced above) for more tag options
 59 |         Examples:
 60 |             tags='highway' 
 61 |                 Matches objects with a 'highway' tag
 62 |             tags='highway=motorway' <-- Matches ob
 63 |                 Matches objects where the 'highway' tag is 'motorway'
 64 |             tags='name~[Mm]agazine'
 65 |                 Match if the 'name' tag matches the regular expression
 66 | 
 67 |             Specify a list of tag requests to match all of them
 68 |             tags=['highway', 'name~"^Magazine"']
 69 |                 Match tags that have 'highway' and where 'name' starts
 70 |                 with 'Magazine'
 71 | 
 72 |     raw : boolean, default False
 73 |         Return the raw XML data returned by the request
 74 |     render : boolean, default True
 75 |         Parse the output and return a final GeoDataFrame
 76 |     meta : boolean, default False
 77 |         Indicates whether to query the metadata with each OSM object. This
 78 |         includes the changeset, timestamp, uid, user, and version.
 79 |                 
 80 |     Returns
 81 |     -------
 82 |     df - GeoDataFrame
 83 |     Note that there's probably a bit more filtering required to get the
 84 |     exact desired data. For example if you only want ways, you may want
 85 |     to grab only the linestrings like:
 86 |         >>> df = df[df.type == 'LineString']
 87 | 
 88 |     """
 89 |     url = _build_url(typ, bbox, recurse, tags, meta)
 90 | 
 91 |     # TODO: Raise on non-200 (or 400-599)
 92 |     with urlopen(url) as response:
 93 |         content = response.read()
 94 | 
 95 |     if raw:
 96 |         return content
 97 |     return read_osm(content, **kwargs)
 98 | 
 99 | 
100 | def _build_url(typ, bbox=None, recurse=None, tags='', meta=False):
101 |     recurse_map = {
102 |         'up': '<',
103 |         'uprel': '<<',
104 |         'down': '>',
105 |         'downrel': '>>',
106 |     }
107 |     if recurse is None:
108 |         recursestr = ''
109 |     else:
110 |         try:
111 |             recursestr = recurse_map[recurse]
112 |         except KeyError:
113 |             raise ValueError("Unrecognized recurse value '{}'. "
114 |                              "Must be one of: {}."
115 |                              .format(recurse, ', '.join(recurse_map.keys())))
116 | 
117 |     # Allow tags to be a single string
118 |     if isinstance(tags, string_types) and tags:
119 |         tags = [tags]
120 |     queries = ''.join('[{}]'.format(t) for t in tags)
121 | 
122 |     # Overpass QL takes the bounding box as
123 |     # (min latitude, min longitude, max latitude, max longitude)
124 |     if bbox is None:
125 |         bboxstr = ''
126 |     else:
127 |         #bboxstr = "({})".format(
128 |             #','.join(str(b) for b in (bbox[1], bbox[0], bbox[3], bbox[2])))
129 |         bboxstr = '(poly:"{}")'.format(
130 |             ' '.join('{c[1]} {c[0]}'.format(c=c) for c in bbox.exterior.coords))
131 | 
132 |     if meta:
133 |         metastr = 'meta'
134 |     else:
135 |         metastr = ''
136 | 
137 |     query = '({typ}{bbox}{queries};{recurse};);out {meta};'.format(
138 |         typ=typ, bbox=bboxstr, queries=queries, recurse=recursestr, meta=metastr)
139 | 
140 |     url = ''.join(['http://www.overpass-api.de/api/interpreter?', 
141 |                    urlencode({'data': query})])
142 | 
143 |     return url
144 | 
145 | 
146 | def read_osm(content, render=True, **kwargs):
147 |     """
148 |     Parse OSM XML data and store as several DataFrames. Optionally "render"
149 |     the DataFrames to GeoDataFrames.
150 | 
151 |     """
152 |     doc = ET.fromstring(content)
153 | 
154 |     nodes = read_nodes(doc)
155 |     waynodes, waytags = read_ways(doc)
156 |     relmembers, reltags = read_relations(doc)
157 | 
158 |     data = OSMData(nodes, waynodes, waytags, relmembers, reltags)
159 |     
160 |     if render:
161 |         data = render_to_gdf(data, **kwargs)
162 | 
163 |     return data
164 | 
165 | 
166 | def read_nodes(doc):
167 |     #   Example:
168 |     #   <node id="1705717514" lat="42.3630798" lon="-71.0997601">
169 |     #       <tag k="crossing" v="zebra"/>
170 |     #       <tag k="highway" v="crossing"/>
171 |     #       <tag k="source" v="Bing"/>
172 |     #   </node>
173 |     nodes = [_element_to_dict(xmlnode) for xmlnode in doc.findall('node')]
174 |     nodes = _dict_to_dataframe(nodes)
175 |     nodes['lon'] = nodes['lon'].astype(float)
176 |     nodes['lat'] = nodes['lat'].astype(float)
177 | 
178 |     return nodes
179 | 
180 | 
181 | def _element_to_dict(element):
182 |     d = element.attrib.copy()
183 |     for t in element.findall('tag'):
184 |         k = t.attrib['k']
185 |         if k not in uninteresting_tags:
186 |             d[k] = t.attrib['v']
187 |     
188 |     return d
189 | 
190 | 
191 | def _dict_to_dataframe(d):
192 |     df = pd.DataFrame.from_dict(d)
193 |     if 'timestamp' in df:
194 |         df['timestamp'] = pd.to_datetime(df['timestamp'])
195 | 
196 |     return df
197 | 
198 | 
199 | def read_ways(doc):
200 |     #   Example:
201 |     #   <way id="8614593">
202 |     #       <nd ref="61326730"/>
203 |     #       <nd ref="61326036"/>
204 |     #       <nd ref="61321194"/>
205 |     #       <tag k="attribution" v="Office of Geographic and Environmental Information (MassGIS)"/>
206 |     #       <tag k="condition" v="fair"/>
207 |     #       <tag k="created_by" v="JOSM"/>
208 |     #       <tag k="highway" v="residential"/>
209 |     #       <tag k="lanes" v="2"/>
210 |     #       <tag k="massgis:way_id" v="171099"/>
211 |     #       <tag k="name" v="Centre Street"/>
212 |     #       <tag k="source" v="massgis_import_v0.1_20071008165629"/>
213 |     #       <tag k="width" v="13.4"/>
214 |     #   </way>
215 |     waytags = []
216 |     waynodes = []
217 |     for xmlway in doc.findall('way'):
218 |         wayid = xmlway.attrib['id']
219 |         for i, xmlnd in enumerate(xmlway.findall('nd')):
220 |             d = xmlnd.attrib.copy()
221 |             d['id'] = wayid
222 |             d['index'] = i
223 |             waynodes.append(d)
224 | 
225 |         tags = _element_to_dict(xmlway)
226 |         waytags.append(tags)
227 | 
228 |     waynodes = _dict_to_dataframe(waynodes)
229 |     waytags = _dict_to_dataframe(waytags)
230 | 
231 |     return waynodes, waytags
232 | 
233 | 
234 | def read_relations(doc):
235 |     # Example:
236 |     #   <relation id="1933745">
237 |     #     <member type="way" ref="134055159" role="outer"/>
238 |     #     <member type="way" ref="260533047" role="outer"/>
239 |     #     <member type="way" ref="142867799" role="outer"/>
240 |     #     <member type="way" ref="134063352" role="outer"/>
241 |     #     <member type="way" ref="142803038" role="outer"/>
242 |     #     <member type="way" ref="134056144" role="outer"/>
243 |     #     <member type="way" ref="134056141" role="outer"/>
244 |     #     <tag k="admin_level" v="8"/>
245 |     #     <tag k="boundary" v="administrative"/>
246 |     #     <tag k="name" v="Cambridge"/>
247 |     #     <tag k="type" v="boundary"/>
248 |     #     <tag k="wikipedia" v="en:Cambridge, Massachusetts"/>
249 |     #   </relation>
250 |     reltags = []
251 |     relmembers = []
252 |     for xmlrel in doc.findall('relation'):
253 |         relid = xmlrel.attrib['id']
254 |         for i, xmlmember in enumerate(xmlrel.findall('member')):
255 |             d = xmlmember.attrib.copy()
256 |             d['id'] = relid
257 |             d['index'] = i
258 |             relmembers.append(d)
259 | 
260 |         tags = _element_to_dict(xmlrel)
261 |         reltags.append(tags)
262 | 
263 |     relmembers = _dict_to_dataframe(relmembers)
264 |     reltags = _dict_to_dataframe(reltags)
265 | 
266 |     return relmembers, reltags
267 | 
268 | 
269 | def render_to_gdf(osmdata, drop_untagged=True):
270 |     nodes = render_nodes(osmdata.nodes, drop_untagged)
271 | 
272 |     ways = render_ways(osmdata.nodes, osmdata.waynodes, osmdata.waytags)
273 |     if ways is not None:
274 |         # We should get append working
275 |         nodes = nodes.append(ways).set_geometry('geometry', crs=_crs)
276 | 
277 |     return nodes
278 | 
279 | 
280 | def render_nodes(nodes, drop_untagged=True):
281 |     # Drop nodes that have no tags, convert lon/lat to points
282 |     if drop_untagged:
283 |         nodes = nodes.dropna(subset=nodes.columns.drop(['id', 'lon', 'lat']),
284 |                              how='all')
285 |     points = [Point(x['lon'], x['lat']) for i, x in nodes.iterrows()]
286 |     nodes = nodes.drop(['lon', 'lat'], axis=1)
287 |     nodes = nodes.set_geometry(points, crs=_crs)
288 | 
289 |     return nodes
290 | 
291 | 
292 | def render_ways(nodes, waynodes, waytags):
293 |     if waynodes is None or waynodes.empty:
294 |         return None
295 | 
296 |     node_points = nodes[['id', 'lon', 'lat']]
297 | 
298 |     def wayline(df):
299 |         df = df.sort_index(by='index')[['lon', 'lat']]
300 |         return LineString(df.values)
301 | 
302 |     # Group the ways and create a LineString for each one.  way_lines is a
303 |     # Series where the index is the way id and the value is the LineString.
304 |     # Merge it with the waytags to get a single GeoDataFrame of ways
305 |     waynodes = waynodes.merge(node_points, left_on='ref', right_on='id',
306 |                               suffixes=('', '_nodes'))
307 |     way_lines = waynodes.groupby('id').apply(wayline)
308 |     ways = waytags.set_index('id').set_geometry(way_lines, crs=_crs)
309 |     ways.reset_index(inplace=True)
310 | 
311 |     return ways
312 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | 
 3 | from codecs import open as codecs_open
 4 | from setuptools import setup, find_packages
 5 | 
 6 | 
 7 | # Get the long description from the relevant file
 8 | with codecs_open('README.rst', encoding='utf-8') as f:
 9 |     long_description = f.read()
10 | 
11 | 
12 | setup(name='geopandas_osm',
13 |       version='0.0.1',
14 |       description="Skeleton of a Python package",
15 |       long_description=long_description,
16 |       classifiers=[],
17 |       keywords='',
18 |       author="Jacob Wasserman",
19 |       author_email='jwasserman@gmail.com',
20 |       url='https://github.com/jwass/geopandas_osm',
21 |       license='MIT',
22 |       packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
23 |       include_package_data=True,
24 |       zip_safe=False,
25 |       install_requires=[
26 |           'six',
27 |           'geopandas',
28 |       ],
29 |       extras_require={
30 |           'test': ['pytest'],
31 |       },
32 |       entry_points="""
33 |       [console_scripts]
34 |       """
35 |       )
36 | 


--------------------------------------------------------------------------------