├── LICENSE ├── README.rst ├── geopandas_osm ├── __init__.py └── osm.py └── setup.py /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Jacob Wasserman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | geopandas_osm 2 | ====== 3 | 4 | A GeoPandas interface to query OpenStreetMap Overpass API 5 | 6 | Quick start 7 | ------------------------- 8 | 9 | Assuming you have a polygon for a boundary 10 | 11 | .. code-block:: python 12 | 13 | import json 14 | 15 | import shapely.geometry 16 | import geopandas_osm.osm 17 | 18 | with open('boundary.geojson') as f: 19 | data = json.load(f) 20 | 21 | poly = shapely.geometry.shape(data['features'][0]['geometry']) 22 | df = geopandas_osm.osm.query_osm('way', poly, recurse='down', tags='highway') 23 | 24 | roads = df[df.type == 'LineString'][['highway', 'name', 'geometry']] 25 | 26 | 27 | 28 | Project skeleton based on http://github.com/mapbox/pyskel 29 | -------------------------------------------------------------------------------- /geopandas_osm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwass/geopandas_osm/07e777467e244dfbf8692e7f71ce61bcdaec8682/geopandas_osm/__init__.py -------------------------------------------------------------------------------- /geopandas_osm/osm.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import xml.etree.ElementTree as ET 3 | 4 | import fiona.crs 5 | import geopandas as gpd 6 | from pandas.io.common import urlopen, urlencode 7 | import pandas as pd 8 | from shapely.geometry import Point, LineString 9 | from six import string_types 10 | 11 | OSMData = collections.namedtuple('OSMData', ('nodes', 'waynodes', 'waytags', 12 | 'relmembers', 'reltags')) 13 | _crs = fiona.crs.from_epsg(4326) 14 | 15 | # Tags to remove so we don't clobber the output. This list comes from 16 | # osmtogeojson's index.js (https://github.com/tyrasd/osmtogeojson) 17 | uninteresting_tags = set([ 18 | "source", 19 | "source_ref", 20 | "source:ref", 21 | "history", 22 | "attribution", 23 | "created_by", 24 | "tiger:county", 25 | "tiger:tlid", 26 | "tiger:upload_uuid", 27 | ]) 28 | 29 | 30 | # http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide 31 | def query_osm(typ, bbox=None, recurse=None, tags='', raw=False, 32 | meta=False, **kwargs): 33 | """ 34 | Query the Overpass API to obtain OpenStreetMap data. 35 | 36 | See also: 37 | http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide 38 | 39 | The OSM XML data is parsed into an intermediate set of DataFrames. 40 | By passing in 'render=False', this will return these DataFrames stored 41 | as the OSMData namedtuple. If render is True, then the DataFrames 42 | are built into their corresponding geometries. 43 | 44 | Parameters 45 | ---------- 46 | typ : {'node', 'way', 'relation'} 47 | The type of OSM data to query 48 | bbox : (min lon, min lat, max lon, max lat) bounding box 49 | Optional bounding box to restrict the query. Unless the query 50 | is extremely restricted, you usually want to specify this. 51 | It can be retrieved from GeoPandas objects as 'df.total_bounds' or 52 | from Shapely objects as 'geom.bounds' 53 | recurse : {'up, 'down', 'uprel', 'downrel'} 54 | This is used to get more data than the original query. If 'typ' is 55 | 'way', you'll usually want this set to 'down' which grabs all nodes 56 | of the matching ways 57 | tags : string or list of query strings 58 | See also the OverpassQL (referenced above) for more tag options 59 | Examples: 60 | tags='highway' 61 | Matches objects with a 'highway' tag 62 | tags='highway=motorway' <-- Matches ob 63 | Matches objects where the 'highway' tag is 'motorway' 64 | tags='name~[Mm]agazine' 65 | Match if the 'name' tag matches the regular expression 66 | 67 | Specify a list of tag requests to match all of them 68 | tags=['highway', 'name~"^Magazine"'] 69 | Match tags that have 'highway' and where 'name' starts 70 | with 'Magazine' 71 | 72 | raw : boolean, default False 73 | Return the raw XML data returned by the request 74 | render : boolean, default True 75 | Parse the output and return a final GeoDataFrame 76 | meta : boolean, default False 77 | Indicates whether to query the metadata with each OSM object. This 78 | includes the changeset, timestamp, uid, user, and version. 79 | 80 | Returns 81 | ------- 82 | df - GeoDataFrame 83 | Note that there's probably a bit more filtering required to get the 84 | exact desired data. For example if you only want ways, you may want 85 | to grab only the linestrings like: 86 | >>> df = df[df.type == 'LineString'] 87 | 88 | """ 89 | url = _build_url(typ, bbox, recurse, tags, meta) 90 | 91 | # TODO: Raise on non-200 (or 400-599) 92 | with urlopen(url) as response: 93 | content = response.read() 94 | 95 | if raw: 96 | return content 97 | return read_osm(content, **kwargs) 98 | 99 | 100 | def _build_url(typ, bbox=None, recurse=None, tags='', meta=False): 101 | recurse_map = { 102 | 'up': '<', 103 | 'uprel': '<<', 104 | 'down': '>', 105 | 'downrel': '>>', 106 | } 107 | if recurse is None: 108 | recursestr = '' 109 | else: 110 | try: 111 | recursestr = recurse_map[recurse] 112 | except KeyError: 113 | raise ValueError("Unrecognized recurse value '{}'. " 114 | "Must be one of: {}." 115 | .format(recurse, ', '.join(recurse_map.keys()))) 116 | 117 | # Allow tags to be a single string 118 | if isinstance(tags, string_types) and tags: 119 | tags = [tags] 120 | queries = ''.join('[{}]'.format(t) for t in tags) 121 | 122 | # Overpass QL takes the bounding box as 123 | # (min latitude, min longitude, max latitude, max longitude) 124 | if bbox is None: 125 | bboxstr = '' 126 | else: 127 | #bboxstr = "({})".format( 128 | #','.join(str(b) for b in (bbox[1], bbox[0], bbox[3], bbox[2]))) 129 | bboxstr = '(poly:"{}")'.format( 130 | ' '.join('{c[1]} {c[0]}'.format(c=c) for c in bbox.exterior.coords)) 131 | 132 | if meta: 133 | metastr = 'meta' 134 | else: 135 | metastr = '' 136 | 137 | query = '({typ}{bbox}{queries};{recurse};);out {meta};'.format( 138 | typ=typ, bbox=bboxstr, queries=queries, recurse=recursestr, meta=metastr) 139 | 140 | url = ''.join(['http://www.overpass-api.de/api/interpreter?', 141 | urlencode({'data': query})]) 142 | 143 | return url 144 | 145 | 146 | def read_osm(content, render=True, **kwargs): 147 | """ 148 | Parse OSM XML data and store as several DataFrames. Optionally "render" 149 | the DataFrames to GeoDataFrames. 150 | 151 | """ 152 | doc = ET.fromstring(content) 153 | 154 | nodes = read_nodes(doc) 155 | waynodes, waytags = read_ways(doc) 156 | relmembers, reltags = read_relations(doc) 157 | 158 | data = OSMData(nodes, waynodes, waytags, relmembers, reltags) 159 | 160 | if render: 161 | data = render_to_gdf(data, **kwargs) 162 | 163 | return data 164 | 165 | 166 | def read_nodes(doc): 167 | # Example: 168 | # 169 | # 170 | # 171 | # 172 | # 173 | nodes = [_element_to_dict(xmlnode) for xmlnode in doc.findall('node')] 174 | nodes = _dict_to_dataframe(nodes) 175 | nodes['lon'] = nodes['lon'].astype(float) 176 | nodes['lat'] = nodes['lat'].astype(float) 177 | 178 | return nodes 179 | 180 | 181 | def _element_to_dict(element): 182 | d = element.attrib.copy() 183 | for t in element.findall('tag'): 184 | k = t.attrib['k'] 185 | if k not in uninteresting_tags: 186 | d[k] = t.attrib['v'] 187 | 188 | return d 189 | 190 | 191 | def _dict_to_dataframe(d): 192 | df = pd.DataFrame.from_dict(d) 193 | if 'timestamp' in df: 194 | df['timestamp'] = pd.to_datetime(df['timestamp']) 195 | 196 | return df 197 | 198 | 199 | def read_ways(doc): 200 | # Example: 201 | # 202 | # 203 | # 204 | # 205 | # 206 | # 207 | # 208 | # 209 | # 210 | # 211 | # 212 | # 213 | # 214 | # 215 | waytags = [] 216 | waynodes = [] 217 | for xmlway in doc.findall('way'): 218 | wayid = xmlway.attrib['id'] 219 | for i, xmlnd in enumerate(xmlway.findall('nd')): 220 | d = xmlnd.attrib.copy() 221 | d['id'] = wayid 222 | d['index'] = i 223 | waynodes.append(d) 224 | 225 | tags = _element_to_dict(xmlway) 226 | waytags.append(tags) 227 | 228 | waynodes = _dict_to_dataframe(waynodes) 229 | waytags = _dict_to_dataframe(waytags) 230 | 231 | return waynodes, waytags 232 | 233 | 234 | def read_relations(doc): 235 | # Example: 236 | # 237 | # 238 | # 239 | # 240 | # 241 | # 242 | # 243 | # 244 | # 245 | # 246 | # 247 | # 248 | # 249 | # 250 | reltags = [] 251 | relmembers = [] 252 | for xmlrel in doc.findall('relation'): 253 | relid = xmlrel.attrib['id'] 254 | for i, xmlmember in enumerate(xmlrel.findall('member')): 255 | d = xmlmember.attrib.copy() 256 | d['id'] = relid 257 | d['index'] = i 258 | relmembers.append(d) 259 | 260 | tags = _element_to_dict(xmlrel) 261 | reltags.append(tags) 262 | 263 | relmembers = _dict_to_dataframe(relmembers) 264 | reltags = _dict_to_dataframe(reltags) 265 | 266 | return relmembers, reltags 267 | 268 | 269 | def render_to_gdf(osmdata, drop_untagged=True): 270 | nodes = render_nodes(osmdata.nodes, drop_untagged) 271 | 272 | ways = render_ways(osmdata.nodes, osmdata.waynodes, osmdata.waytags) 273 | if ways is not None: 274 | # We should get append working 275 | nodes = nodes.append(ways).set_geometry('geometry', crs=_crs) 276 | 277 | return nodes 278 | 279 | 280 | def render_nodes(nodes, drop_untagged=True): 281 | # Drop nodes that have no tags, convert lon/lat to points 282 | if drop_untagged: 283 | nodes = nodes.dropna(subset=nodes.columns.drop(['id', 'lon', 'lat']), 284 | how='all') 285 | points = [Point(x['lon'], x['lat']) for i, x in nodes.iterrows()] 286 | nodes = nodes.drop(['lon', 'lat'], axis=1) 287 | nodes = nodes.set_geometry(points, crs=_crs) 288 | 289 | return nodes 290 | 291 | 292 | def render_ways(nodes, waynodes, waytags): 293 | if waynodes is None or waynodes.empty: 294 | return None 295 | 296 | node_points = nodes[['id', 'lon', 'lat']] 297 | 298 | def wayline(df): 299 | df = df.sort_index(by='index')[['lon', 'lat']] 300 | return LineString(df.values) 301 | 302 | # Group the ways and create a LineString for each one. way_lines is a 303 | # Series where the index is the way id and the value is the LineString. 304 | # Merge it with the waytags to get a single GeoDataFrame of ways 305 | waynodes = waynodes.merge(node_points, left_on='ref', right_on='id', 306 | suffixes=('', '_nodes')) 307 | way_lines = waynodes.groupby('id').apply(wayline) 308 | ways = waytags.set_index('id').set_geometry(way_lines, crs=_crs) 309 | ways.reset_index(inplace=True) 310 | 311 | return ways 312 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from codecs import open as codecs_open 4 | from setuptools import setup, find_packages 5 | 6 | 7 | # Get the long description from the relevant file 8 | with codecs_open('README.rst', encoding='utf-8') as f: 9 | long_description = f.read() 10 | 11 | 12 | setup(name='geopandas_osm', 13 | version='0.0.1', 14 | description="Skeleton of a Python package", 15 | long_description=long_description, 16 | classifiers=[], 17 | keywords='', 18 | author="Jacob Wasserman", 19 | author_email='jwasserman@gmail.com', 20 | url='https://github.com/jwass/geopandas_osm', 21 | license='MIT', 22 | packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), 23 | include_package_data=True, 24 | zip_safe=False, 25 | install_requires=[ 26 | 'six', 27 | 'geopandas', 28 | ], 29 | extras_require={ 30 | 'test': ['pytest'], 31 | }, 32 | entry_points=""" 33 | [console_scripts] 34 | """ 35 | ) 36 | --------------------------------------------------------------------------------