├── .gitignore ├── LICENSE ├── README.md ├── folium └── folium.ipynb ├── folium_legend └── folium_legend.ipynb ├── folium_lines └── folium_arrow_lines.ipynb ├── folium_markerclusters └── folium_markerclusters.ipynb ├── geopandas_sjoin ├── data │ └── gz_2010_us_040_00_500k.json └── geopandas_sjoin.ipynb ├── osmnx_routing └── OSMnx_routing.ipynb └── reverse_geocoding └── reverse_geocoder.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # apple 104 | .DS_Store 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Bob Haffner 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## [Spatial Joins in GeoPandas](https://medium.com/@bobhaffner/spatial-joins-in-geopandas-c5e916a763f3) 2 | 3 | [code](https://github.com/bobhaffner/medium_posts/tree/master/geopandas_sjoin) 4 | 5 | 6 | 7 | ## [Folium is an easy to use Python GIS tool](https://medium.com/@bobhaffner/folium-is-an-easy-to-use-python-gis-tool-dbc7eb11fb12) 8 | [code](https://github.com/bobhaffner/medium_posts/tree/master/folium) 9 | 10 | 11 | 12 | ## [Creating a legend for a Folium map](https://medium.com/@bobhaffner/creating-a-legend-for-a-folium-map-c1e0ffc34373) 13 | 14 | [code](https://github.com/bobhaffner/medium_posts/tree/master/folium_legend) 15 | 16 | 17 | ## [Folium MarkerClusters and FastMarkerClusters](https://medium.com/@bobhaffner/folium-markerclusters-and-fastmarkerclusters-1e03b01cb7b1) 18 | 19 | [code](https://github.com/bobhaffner/medium_posts/tree/master/folium_markerclusters) 20 | 21 | ## [Folium lines with arrows](https://medium.com/@bobhaffner/folium-lines-with-arrows-25a0fe88e4e) 22 | 23 | [code](https://github.com/bobhaffner/medium_posts/tree/master/folium_lines) 24 | 25 | 26 | ## [Reverse Geocoding](https://medium.com/@bobhaffner/reverse-geocoding-4be0542fdc35) 27 | 28 | [code](https://github.com/bobhaffner/medium_posts/tree/master/reverse_geocoding) 29 | -------------------------------------------------------------------------------- /folium/folium.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2018-01-30T03:31:07.597567Z", 9 | "start_time": "2018-01-30T03:31:06.746520Z" 10 | }, 11 | "collapsed": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "import pandas as pd\n", 16 | "import folium" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 5, 22 | "metadata": { 23 | "ExecuteTime": { 24 | "end_time": "2018-01-30T03:33:46.560997Z", 25 | "start_time": "2018-01-30T03:33:45.676872Z" 26 | } 27 | }, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/html": [ 32 | "
\n", 33 | "\n", 46 | "\n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | "
GEOIDALANDAWATERALAND_SQMIAWATER_SQMIINTPTLATINTPTLONG
00060116665988479929364.3480.30918.180555-66.749961
10060279287203444876130.6131.71818.361945-67.175597
2006038188470718389531.6160.07118.455183-67.119887
3006061095799981248742.3090.00518.158345-66.932911
40061093020979417199435.9161.61118.295366-67.125135
\n", 112 | "
" 113 | ], 114 | "text/plain": [ 115 | " GEOID ALAND AWATER ALAND_SQMI AWATER_SQMI INTPTLAT INTPTLONG\n", 116 | "0 00601 166659884 799293 64.348 0.309 18.180555 -66.749961\n", 117 | "1 00602 79287203 4448761 30.613 1.718 18.361945 -67.175597\n", 118 | "2 00603 81884707 183895 31.616 0.071 18.455183 -67.119887\n", 119 | "3 00606 109579998 12487 42.309 0.005 18.158345 -66.932911\n", 120 | "4 00610 93020979 4171994 35.916 1.611 18.295366 -67.125135" 121 | ] 122 | }, 123 | "execution_count": 5, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "file_url = 'http://www2.census.gov/geo/docs/maps-data/data/gazetteer/2016_Gazetteer/2016_Gaz_zcta_national.zip'\n", 130 | "\n", 131 | "#Pandas usually infers zips are numerics, but we lose our leading zeroes so let's go with the object dtype\n", 132 | "df = pd.read_csv(file_url, sep='\\t', dtype={'GEOID' : object}) \n", 133 | "\n", 134 | "df.columns = df.columns.str.strip() #some column names have some extra padding\n", 135 | "\n", 136 | "df.head()" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 4, 142 | "metadata": { 143 | "ExecuteTime": { 144 | "end_time": "2018-01-30T03:32:05.812189Z", 145 | "start_time": "2018-01-30T03:32:02.999265Z" 146 | } 147 | }, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "text/html": [ 152 | "
" 153 | ], 154 | "text/plain": [ 155 | "" 156 | ] 157 | }, 158 | "execution_count": 4, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "#grab a random sample from df\n", 165 | "subset_of_df = df.sample(n=100)\n", 166 | "\n", 167 | "#creating a map that's centered to our sample\n", 168 | "some_map = folium.Map(location=[subset_of_df['INTPTLAT'].mean(), \n", 169 | " subset_of_df['INTPTLONG'].mean()], \n", 170 | " zoom_start=4)\n", 171 | "\n", 172 | "#creating a Marker for each point in df_sample. Each point will get a popup with their zip\n", 173 | "for row in subset_of_df.itertuples():\n", 174 | " some_map.add_child(folium.Marker(location=[row.INTPTLAT, row.INTPTLONG],\n", 175 | " popup=row.GEOID))\n", 176 | "\n", 177 | "some_map" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "collapsed": true 185 | }, 186 | "outputs": [], 187 | "source": [] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3", 193 | "language": "python", 194 | "name": "python3" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.5.4" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 2 211 | } 212 | -------------------------------------------------------------------------------- /folium_lines/folium_arrow_lines.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-06-12T14:14:37.729153Z", 9 | "start_time": "2019-06-12T14:14:36.988070Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import folium\n", 15 | "import numpy as np\n", 16 | "import pandas as pd\n", 17 | "from collections import namedtuple" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2019-06-12T14:14:39.812458Z", 26 | "start_time": "2019-06-12T14:14:39.699864Z" 27 | } 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "def get_bearing(p1, p2):\n", 32 | " \n", 33 | " '''\n", 34 | " Returns compass bearing from p1 to p2\n", 35 | " \n", 36 | " Parameters\n", 37 | " p1 : namedtuple with lat lon\n", 38 | " p2 : namedtuple with lat lon\n", 39 | " \n", 40 | " Return\n", 41 | " compass bearing of type float\n", 42 | " \n", 43 | " Notes\n", 44 | " Based on https://gist.github.com/jeromer/2005586\n", 45 | " '''\n", 46 | " \n", 47 | " long_diff = np.radians(p2.lon - p1.lon)\n", 48 | " \n", 49 | " lat1 = np.radians(p1.lat)\n", 50 | " lat2 = np.radians(p2.lat)\n", 51 | " \n", 52 | " x = np.sin(long_diff) * np.cos(lat2)\n", 53 | " y = (np.cos(lat1) * np.sin(lat2) \n", 54 | " - (np.sin(lat1) * np.cos(lat2) \n", 55 | " * np.cos(long_diff)))\n", 56 | "\n", 57 | " bearing = np.degrees(np.arctan2(x, y))\n", 58 | " \n", 59 | " # adjusting for compass bearing\n", 60 | " if bearing < 0:\n", 61 | " return bearing + 360\n", 62 | " return bearing\n", 63 | "\n", 64 | "\n", 65 | "def get_arrows(locations, color='blue', size=6, n_arrows=3):\n", 66 | " \n", 67 | " '''\n", 68 | " Get a list of correctly placed and rotated \n", 69 | " arrows/markers to be plotted\n", 70 | " \n", 71 | " Parameters\n", 72 | " locations : list of lists of lat lons that represent the \n", 73 | " start and end of the line. \n", 74 | " eg [[41.1132, -96.1993],[41.3810, -95.8021]]\n", 75 | " arrow_color : whatever folium can use. default is 'blue'\n", 76 | " size : default is 6\n", 77 | " n_arrows : number of arrows to create. default is 3\n", 78 | "\n", 79 | " Return\n", 80 | " list of arrows/markers\n", 81 | " '''\n", 82 | " \n", 83 | " Point = namedtuple('Point', field_names=['lat', 'lon'])\n", 84 | " \n", 85 | " # creating point from our Point named tuple\n", 86 | " p1 = Point(locations[0][0], locations[0][1])\n", 87 | " p2 = Point(locations[1][0], locations[1][1])\n", 88 | " \n", 89 | " # getting the rotation needed for our marker. \n", 90 | " # Subtracting 90 to account for the marker's orientation\n", 91 | " # of due East(get_bearing returns North)\n", 92 | " rotation = get_bearing(p1, p2) - 90\n", 93 | " \n", 94 | " # get an evenly space list of lats and lons for our arrows\n", 95 | " # note that I'm discarding the first and last for aesthetics\n", 96 | " # as I'm using markers to denote the start and end\n", 97 | " arrow_lats = np.linspace(p1.lat, p2.lat, n_arrows + 2)[1:n_arrows+1]\n", 98 | " arrow_lons = np.linspace(p1.lon, p2.lon, n_arrows + 2)[1:n_arrows+1]\n", 99 | " \n", 100 | " arrows = []\n", 101 | " \n", 102 | " #creating each \"arrow\" and appending them to our arrows list\n", 103 | " for points in zip(arrow_lats, arrow_lons):\n", 104 | " arrows.append(folium.RegularPolygonMarker(location=points, \n", 105 | " fill_color=color, number_of_sides=3, \n", 106 | " radius=size, rotation=rotation))\n", 107 | " return arrows" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 7, 113 | "metadata": { 114 | "ExecuteTime": { 115 | "end_time": "2019-06-12T14:14:48.654868Z", 116 | "start_time": "2019-06-12T14:14:48.645346Z" 117 | } 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "# using omaha coordinates \n", 122 | "center_lat = 41.257160\n", 123 | "center_lon = -95.995102\n", 124 | "\n", 125 | "# generating a couple of random latlongs in the omaha area\n", 126 | "lats = np.random.uniform(low=center_lat - .25, high=center_lat + .25, size=(2,))\n", 127 | "lons = np.random.uniform(low=center_lon - .25, high=center_lon + .25, size=(2,))\n", 128 | "\n", 129 | "p1 = [lats[0], lons[0]]\n", 130 | "p2 = [lats[1], lons[1]]" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 8, 136 | "metadata": { 137 | "ExecuteTime": { 138 | "end_time": "2019-06-12T14:14:49.160587Z", 139 | "start_time": "2019-06-12T14:14:49.126357Z" 140 | } 141 | }, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/html": [ 146 | "
" 147 | ], 148 | "text/plain": [ 149 | "" 150 | ] 151 | }, 152 | "execution_count": 8, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "some_map = folium.Map(location=[center_lat, center_lon], zoom_start=10)\n", 159 | "\n", 160 | "folium.Marker(location=p1, icon=folium.Icon(color='green')).add_to(some_map)\n", 161 | "folium.Marker(location=p2, icon=folium.Icon(color='red')).add_to(some_map)\n", 162 | "\n", 163 | "folium.PolyLine(locations=[p1, p2], color='blue').add_to(some_map)\n", 164 | "arrows = get_arrows(locations=[p1, p2], color='#FFFFFF', n_arrows=3)\n", 165 | "\n", 166 | "for arrow in arrows:\n", 167 | " arrow.add_to(some_map)\n", 168 | "\n", 169 | "some_map" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": { 176 | "collapsed": true 177 | }, 178 | "outputs": [], 179 | "source": [] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "Python [default]", 185 | "language": "python", 186 | "name": "python3" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.5.4" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 2 203 | } 204 | -------------------------------------------------------------------------------- /geopandas_sjoin/geopandas_sjoin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2017-09-18T13:17:47.293425Z", 9 | "start_time": "2017-09-18T13:17:46.276981Z" 10 | }, 11 | "collapsed": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "import pandas as pd\n", 16 | "import geopandas as gpd\n", 17 | "import requests\n", 18 | "import os\n", 19 | "from shapely.geometry import Point" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": { 26 | "ExecuteTime": { 27 | "end_time": "2017-09-18T13:17:50.175674Z", 28 | "start_time": "2017-09-18T13:17:48.811340Z" 29 | } 30 | }, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "33144\n", 37 | " GEOID INTPTLAT INTPTLONG\n", 38 | "0 00601 18.180555 -66.749961\n", 39 | "1 00602 18.361945 -67.175597\n", 40 | "2 00603 18.455183 -67.119887\n", 41 | "3 00606 18.158345 -66.932911\n", 42 | "4 00610 18.295366 -67.125135\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "url = 'http://www2.census.gov/geo/docs/maps-data/data/gazetteer/2016_Gazetteer/2016_Gaz_zcta_national.zip'\n", 48 | "zips = pd.read_csv(url, dtype={'GEOID' : 'str'},sep='\\t', usecols=[0,5,6])\n", 49 | "zips.columns = zips.columns.str.strip() #some column cleanup\n", 50 | "print (len(zips))\n", 51 | "print (zips.head())" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "metadata": { 58 | "ExecuteTime": { 59 | "end_time": "2017-09-18T13:18:11.296235Z", 60 | "start_time": "2017-09-18T13:18:10.005132Z" 61 | } 62 | }, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | " GEOID INTPTLAT INTPTLONG geometry\n", 69 | "0 00601 18.180555 -66.749961 POINT (-66.749961 18.180555)\n", 70 | "1 00602 18.361945 -67.175597 POINT (-67.175597 18.361945)\n", 71 | "2 00603 18.455183 -67.119887 POINT (-67.11988700000001 18.455183)\n", 72 | "3 00606 18.158345 -66.932911 POINT (-66.932911 18.158345)\n", 73 | "4 00610 18.295366 -67.125135 POINT (-67.125135 18.295366)\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "geom = zips.apply(lambda x : Point([x['INTPTLONG'],x['INTPTLAT']]), axis=1)\n", 79 | "zips = gpd.GeoDataFrame(zips, geometry=geom)\n", 80 | "zips.crs = {'init' :'epsg:4326'}\n", 81 | "print (zips.head())" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": { 88 | "ExecuteTime": { 89 | "end_time": "2017-09-18T13:18:28.593177Z", 90 | "start_time": "2017-09-18T13:18:28.042696Z" 91 | } 92 | }, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "52\n", 99 | " NAME geometry\n", 100 | "0 Maine (POLYGON ((-67.619761 44.519754, -67.61541 44....\n", 101 | "1 Massachusetts (POLYGON ((-70.832044 41.606504, -70.823735 41...\n", 102 | "2 Michigan (POLYGON ((-88.684434 48.115785, -88.675628 48...\n", 103 | "3 Montana POLYGON ((-104.057698 44.997431, -104.250145 4...\n", 104 | "4 Nevada POLYGON ((-114.0506 37.000396, -114.049995 36....\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "geojson_file = 'data/gz_2010_us_040_00_500k.json'\n", 110 | "\n", 111 | "if not os.path.exists(geojson_file):\n", 112 | " url = 'http://eric.clst.org/wupl/Stuff/gz_2010_us_040_00_500k.json'\n", 113 | " with open(geojson_file, 'w') as f:\n", 114 | " f.write(requests.get(url).text)\n", 115 | "\n", 116 | "states = gpd.read_file(geojson_file)[['NAME', 'geometry']]\n", 117 | "print (len(states))\n", 118 | "print (states.head())" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 6, 124 | "metadata": { 125 | "ExecuteTime": { 126 | "end_time": "2017-09-18T13:18:33.216678Z", 127 | "start_time": "2017-09-18T13:18:31.858911Z" 128 | } 129 | }, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | " GEOID NAME geometry index_right\n", 136 | "0 00601 Puerto Rico POINT (-66.749961 18.180555) 16\n", 137 | "1 00602 Puerto Rico POINT (-67.175597 18.361945) 16\n", 138 | "2 00603 Puerto Rico POINT (-67.11988700000001 18.455183) 16\n", 139 | "3 00606 Puerto Rico POINT (-66.932911 18.158345) 16\n", 140 | "4 00610 Puerto Rico POINT (-67.125135 18.295366) 16\n" 141 | ] 142 | } 143 | ], 144 | "source": [ 145 | "zips_and_states = gpd.sjoin(zips, states, op='within')\n", 146 | "print (zips_and_states[['GEOID', 'NAME', 'geometry','index_right']].head())" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": { 153 | "collapsed": true 154 | }, 155 | "outputs": [], 156 | "source": [] 157 | } 158 | ], 159 | "metadata": { 160 | "kernelspec": { 161 | "display_name": "Python 3", 162 | "language": "python", 163 | "name": "python3" 164 | }, 165 | "language_info": { 166 | "codemirror_mode": { 167 | "name": "ipython", 168 | "version": 3 169 | }, 170 | "file_extension": ".py", 171 | "mimetype": "text/x-python", 172 | "name": "python", 173 | "nbconvert_exporter": "python", 174 | "pygments_lexer": "ipython3", 175 | "version": "3.5.4" 176 | } 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 2 180 | } 181 | -------------------------------------------------------------------------------- /reverse_geocoding/reverse_geocoder.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2018-01-31T03:44:20.480521Z", 9 | "start_time": "2018-01-31T03:44:20.359909Z" 10 | }, 11 | "collapsed": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "import numpy as np\n", 16 | "\n", 17 | "omaha_point = np.array((-95.995102, 41.257160)) " 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Pandas and scikit-learn's KDTree" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": { 31 | "ExecuteTime": { 32 | "end_time": "2018-01-31T03:44:22.265595Z", 33 | "start_time": "2018-01-31T03:44:21.077820Z" 34 | }, 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "import pandas as pd\n", 40 | "from sklearn.neighbors import KDTree" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 5, 46 | "metadata": { 47 | "ExecuteTime": { 48 | "end_time": "2018-01-31T03:44:34.634016Z", 49 | "start_time": "2018-01-31T03:44:33.870474Z" 50 | } 51 | }, 52 | "outputs": [ 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "33144\n" 58 | ] 59 | }, 60 | { 61 | "data": { 62 | "text/html": [ 63 | "
\n", 64 | "\n", 77 | "\n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | "
GEOIDINTPTLATINTPTLONG
00060118.180555-66.749961
10060218.361945-67.175597
20060318.455183-67.119887
30060618.158345-66.932911
40061018.295366-67.125135
\n", 119 | "
" 120 | ], 121 | "text/plain": [ 122 | " GEOID INTPTLAT INTPTLONG\n", 123 | "0 00601 18.180555 -66.749961\n", 124 | "1 00602 18.361945 -67.175597\n", 125 | "2 00603 18.455183 -67.119887\n", 126 | "3 00606 18.158345 -66.932911\n", 127 | "4 00610 18.295366 -67.125135" 128 | ] 129 | }, 130 | "execution_count": 5, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "url = 'http://www2.census.gov/geo/docs/maps-data/data/gazetteer/2016_Gazetteer/2016_Gaz_zcta_national.zip'\n", 137 | "df_locations = pd.read_csv(url, dtype={'GEOID' : 'str'},sep='\\t', usecols=[0,5,6])\n", 138 | "df_locations.columns = df_locations.columns.str.strip() #some column cleanup\n", 139 | "print (len(df_locations))\n", 140 | "df_locations.head()" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 6, 146 | "metadata": { 147 | "ExecuteTime": { 148 | "end_time": "2018-01-31T03:44:36.964944Z", 149 | "start_time": "2018-01-31T03:44:36.933236Z" 150 | }, 151 | "collapsed": true 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "kdt = KDTree(df_locations[['INTPTLONG', 'INTPTLAT']])" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 7, 161 | "metadata": { 162 | "ExecuteTime": { 163 | "end_time": "2018-01-31T03:44:37.752155Z", 164 | "start_time": "2018-01-31T03:44:37.742238Z" 165 | } 166 | }, 167 | "outputs": [ 168 | { 169 | "name": "stdout", 170 | "output_type": "stream", 171 | "text": [ 172 | "23609 68132\n", 173 | "Name: GEOID, dtype: object\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "omaha_point_kdt = np.expand_dims(omaha_point, axis=0)\n", 179 | "\n", 180 | "nearest_point_index = kdt.query(omaha_point_kdt, k=1, return_distance=False)\n", 181 | "print(df_locations.loc[nearest_point_index[0], 'GEOID'])" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "## GeoPandas and Shapely" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 8, 194 | "metadata": { 195 | "ExecuteTime": { 196 | "end_time": "2018-01-31T03:44:42.265888Z", 197 | "start_time": "2018-01-31T03:44:42.053058Z" 198 | }, 199 | "collapsed": true 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "import geopandas as gpd\n", 204 | "from shapely.geometry import Point" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 10, 210 | "metadata": { 211 | "ExecuteTime": { 212 | "end_time": "2018-01-31T03:54:07.124201Z", 213 | "start_time": "2018-01-31T03:53:50.699288Z" 214 | } 215 | }, 216 | "outputs": [ 217 | { 218 | "name": "stdout", 219 | "output_type": "stream", 220 | "text": [ 221 | "33144\n" 222 | ] 223 | }, 224 | { 225 | "data": { 226 | "text/html": [ 227 | "
\n", 228 | "\n", 241 | "\n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | "
GEOID10geometry
043451POLYGON ((-83.674464 41.331119, -83.6744449999...
143452POLYGON ((-83.067745 41.537718, -83.067729 41....
243456(POLYGON ((-82.8566 41.681222, -82.856831 41.6...
343457POLYGON ((-83.467474 41.268186, -83.4676039999...
443458POLYGON ((-83.222292 41.531025, -83.2222819999...
\n", 277 | "
" 278 | ], 279 | "text/plain": [ 280 | " GEOID10 geometry\n", 281 | "0 43451 POLYGON ((-83.674464 41.331119, -83.6744449999...\n", 282 | "1 43452 POLYGON ((-83.067745 41.537718, -83.067729 41....\n", 283 | "2 43456 (POLYGON ((-82.8566 41.681222, -82.856831 41.6...\n", 284 | "3 43457 POLYGON ((-83.467474 41.268186, -83.4676039999...\n", 285 | "4 43458 POLYGON ((-83.222292 41.531025, -83.2222819999..." 286 | ] 287 | }, 288 | "execution_count": 10, 289 | "metadata": {}, 290 | "output_type": "execute_result" 291 | } 292 | ], 293 | "source": [ 294 | "#ftp://ftp2.census.gov/geo/tiger/TIGER2016/ZCTA5/tl_2016_us_zcta510.zip\n", 295 | "gdf_locations = gpd.read_file('data/tl_2016_us_zcta510/tl_2016_us_zcta510.shp')\n", 296 | "print(len(gdf_locations))\n", 297 | "gdf_locations[['GEOID10', 'geometry']].head()" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 11, 303 | "metadata": { 304 | "ExecuteTime": { 305 | "end_time": "2018-01-31T03:54:09.626623Z", 306 | "start_time": "2018-01-31T03:54:09.010534Z" 307 | } 308 | }, 309 | "outputs": [ 310 | { 311 | "name": "stdout", 312 | "output_type": "stream", 313 | "text": [ 314 | "24842 68132\n", 315 | "Name: GEOID10, dtype: object\n" 316 | ] 317 | } 318 | ], 319 | "source": [ 320 | "omaha_point_shp = Point(omaha_point)\n", 321 | "\n", 322 | "filter = gdf_locations['geometry'].contains(omaha_point_shp)\n", 323 | "print(gdf_locations.loc[filter, 'GEOID10'])" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "## Timing" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 12, 336 | "metadata": { 337 | "ExecuteTime": { 338 | "end_time": "2018-01-31T03:54:15.041312Z", 339 | "start_time": "2018-01-31T03:54:10.481233Z" 340 | } 341 | }, 342 | "outputs": [ 343 | { 344 | "name": "stdout", 345 | "output_type": "stream", 346 | "text": [ 347 | "563 µs ± 27.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" 348 | ] 349 | } 350 | ], 351 | "source": [ 352 | "%%timeit\n", 353 | "nearest_point_index = kdt.query(omaha_point_kdt, k=1, return_distance=False)\n", 354 | "df_locations.loc[nearest_point_index[0], 'GEOID']" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 13, 360 | "metadata": { 361 | "ExecuteTime": { 362 | "end_time": "2018-01-31T03:54:17.247895Z", 363 | "start_time": "2018-01-31T03:54:15.043292Z" 364 | } 365 | }, 366 | "outputs": [ 367 | { 368 | "name": "stdout", 369 | "output_type": "stream", 370 | "text": [ 371 | "276 ms ± 17.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 372 | ] 373 | } 374 | ], 375 | "source": [ 376 | "%%timeit\n", 377 | "filter = gdf_locations['geometry'].contains(omaha_point_shp)\n", 378 | "gdf_locations.loc[filter, 'GEOID10']" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": { 385 | "collapsed": true 386 | }, 387 | "outputs": [], 388 | "source": [] 389 | } 390 | ], 391 | "metadata": { 392 | "kernelspec": { 393 | "display_name": "Python 3", 394 | "language": "python", 395 | "name": "python3" 396 | }, 397 | "language_info": { 398 | "codemirror_mode": { 399 | "name": "ipython", 400 | "version": 3 401 | }, 402 | "file_extension": ".py", 403 | "mimetype": "text/x-python", 404 | "name": "python", 405 | "nbconvert_exporter": "python", 406 | "pygments_lexer": "ipython3", 407 | "version": "3.5.4" 408 | } 409 | }, 410 | "nbformat": 4, 411 | "nbformat_minor": 2 412 | } 413 | --------------------------------------------------------------------------------