├── .gitignore
├── LICENSE
├── README.md
├── folium
└── folium.ipynb
├── folium_legend
└── folium_legend.ipynb
├── folium_lines
└── folium_arrow_lines.ipynb
├── folium_markerclusters
└── folium_markerclusters.ipynb
├── geopandas_sjoin
├── data
│ └── gz_2010_us_040_00_500k.json
└── geopandas_sjoin.ipynb
├── osmnx_routing
└── OSMnx_routing.ipynb
└── reverse_geocoding
└── reverse_geocoder.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | # apple
104 | .DS_Store
105 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Bob Haffner
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## [Spatial Joins in GeoPandas](https://medium.com/@bobhaffner/spatial-joins-in-geopandas-c5e916a763f3)
2 |
3 | [code](https://github.com/bobhaffner/medium_posts/tree/master/geopandas_sjoin)
4 |
5 |
6 |
7 | ## [Folium is an easy to use Python GIS tool](https://medium.com/@bobhaffner/folium-is-an-easy-to-use-python-gis-tool-dbc7eb11fb12)
8 | [code](https://github.com/bobhaffner/medium_posts/tree/master/folium)
9 |
10 |
11 |
12 | ## [Creating a legend for a Folium map](https://medium.com/@bobhaffner/creating-a-legend-for-a-folium-map-c1e0ffc34373)
13 |
14 | [code](https://github.com/bobhaffner/medium_posts/tree/master/folium_legend)
15 |
16 |
17 | ## [Folium MarkerClusters and FastMarkerClusters](https://medium.com/@bobhaffner/folium-markerclusters-and-fastmarkerclusters-1e03b01cb7b1)
18 |
19 | [code](https://github.com/bobhaffner/medium_posts/tree/master/folium_markerclusters)
20 |
21 | ## [Folium lines with arrows](https://medium.com/@bobhaffner/folium-lines-with-arrows-25a0fe88e4e)
22 |
23 | [code](https://github.com/bobhaffner/medium_posts/tree/master/folium_lines)
24 |
25 |
26 | ## [Reverse Geocoding](https://medium.com/@bobhaffner/reverse-geocoding-4be0542fdc35)
27 |
28 | [code](https://github.com/bobhaffner/medium_posts/tree/master/reverse_geocoding)
29 |
--------------------------------------------------------------------------------
/folium/folium.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "ExecuteTime": {
8 | "end_time": "2018-01-30T03:31:07.597567Z",
9 | "start_time": "2018-01-30T03:31:06.746520Z"
10 | },
11 | "collapsed": true
12 | },
13 | "outputs": [],
14 | "source": [
15 | "import pandas as pd\n",
16 | "import folium"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 5,
22 | "metadata": {
23 | "ExecuteTime": {
24 | "end_time": "2018-01-30T03:33:46.560997Z",
25 | "start_time": "2018-01-30T03:33:45.676872Z"
26 | }
27 | },
28 | "outputs": [
29 | {
30 | "data": {
31 | "text/html": [
32 | "
\n",
33 | "\n",
46 | "
\n",
47 | " \n",
48 | " \n",
49 | " | \n",
50 | " GEOID | \n",
51 | " ALAND | \n",
52 | " AWATER | \n",
53 | " ALAND_SQMI | \n",
54 | " AWATER_SQMI | \n",
55 | " INTPTLAT | \n",
56 | " INTPTLONG | \n",
57 | "
\n",
58 | " \n",
59 | " \n",
60 | " \n",
61 | " 0 | \n",
62 | " 00601 | \n",
63 | " 166659884 | \n",
64 | " 799293 | \n",
65 | " 64.348 | \n",
66 | " 0.309 | \n",
67 | " 18.180555 | \n",
68 | " -66.749961 | \n",
69 | "
\n",
70 | " \n",
71 | " 1 | \n",
72 | " 00602 | \n",
73 | " 79287203 | \n",
74 | " 4448761 | \n",
75 | " 30.613 | \n",
76 | " 1.718 | \n",
77 | " 18.361945 | \n",
78 | " -67.175597 | \n",
79 | "
\n",
80 | " \n",
81 | " 2 | \n",
82 | " 00603 | \n",
83 | " 81884707 | \n",
84 | " 183895 | \n",
85 | " 31.616 | \n",
86 | " 0.071 | \n",
87 | " 18.455183 | \n",
88 | " -67.119887 | \n",
89 | "
\n",
90 | " \n",
91 | " 3 | \n",
92 | " 00606 | \n",
93 | " 109579998 | \n",
94 | " 12487 | \n",
95 | " 42.309 | \n",
96 | " 0.005 | \n",
97 | " 18.158345 | \n",
98 | " -66.932911 | \n",
99 | "
\n",
100 | " \n",
101 | " 4 | \n",
102 | " 00610 | \n",
103 | " 93020979 | \n",
104 | " 4171994 | \n",
105 | " 35.916 | \n",
106 | " 1.611 | \n",
107 | " 18.295366 | \n",
108 | " -67.125135 | \n",
109 | "
\n",
110 | " \n",
111 | "
\n",
112 | "
"
113 | ],
114 | "text/plain": [
115 | " GEOID ALAND AWATER ALAND_SQMI AWATER_SQMI INTPTLAT INTPTLONG\n",
116 | "0 00601 166659884 799293 64.348 0.309 18.180555 -66.749961\n",
117 | "1 00602 79287203 4448761 30.613 1.718 18.361945 -67.175597\n",
118 | "2 00603 81884707 183895 31.616 0.071 18.455183 -67.119887\n",
119 | "3 00606 109579998 12487 42.309 0.005 18.158345 -66.932911\n",
120 | "4 00610 93020979 4171994 35.916 1.611 18.295366 -67.125135"
121 | ]
122 | },
123 | "execution_count": 5,
124 | "metadata": {},
125 | "output_type": "execute_result"
126 | }
127 | ],
128 | "source": [
129 | "file_url = 'http://www2.census.gov/geo/docs/maps-data/data/gazetteer/2016_Gazetteer/2016_Gaz_zcta_national.zip'\n",
130 | "\n",
131 | "#Pandas usually infers zips are numerics, but we lose our leading zeroes so let's go with the object dtype\n",
132 | "df = pd.read_csv(file_url, sep='\\t', dtype={'GEOID' : object}) \n",
133 | "\n",
134 | "df.columns = df.columns.str.strip() #some column names have some extra padding\n",
135 | "\n",
136 | "df.head()"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": 4,
142 | "metadata": {
143 | "ExecuteTime": {
144 | "end_time": "2018-01-30T03:32:05.812189Z",
145 | "start_time": "2018-01-30T03:32:02.999265Z"
146 | }
147 | },
148 | "outputs": [
149 | {
150 | "data": {
151 | "text/html": [
152 | ""
153 | ],
154 | "text/plain": [
155 | ""
156 | ]
157 | },
158 | "execution_count": 4,
159 | "metadata": {},
160 | "output_type": "execute_result"
161 | }
162 | ],
163 | "source": [
164 | "#grab a random sample from df\n",
165 | "subset_of_df = df.sample(n=100)\n",
166 | "\n",
167 | "#creating a map that's centered to our sample\n",
168 | "some_map = folium.Map(location=[subset_of_df['INTPTLAT'].mean(), \n",
169 | " subset_of_df['INTPTLONG'].mean()], \n",
170 | " zoom_start=4)\n",
171 | "\n",
172 | "#creating a Marker for each point in df_sample. Each point will get a popup with their zip\n",
173 | "for row in subset_of_df.itertuples():\n",
174 | " some_map.add_child(folium.Marker(location=[row.INTPTLAT, row.INTPTLONG],\n",
175 | " popup=row.GEOID))\n",
176 | "\n",
177 | "some_map"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": null,
183 | "metadata": {
184 | "collapsed": true
185 | },
186 | "outputs": [],
187 | "source": []
188 | }
189 | ],
190 | "metadata": {
191 | "kernelspec": {
192 | "display_name": "Python 3",
193 | "language": "python",
194 | "name": "python3"
195 | },
196 | "language_info": {
197 | "codemirror_mode": {
198 | "name": "ipython",
199 | "version": 3
200 | },
201 | "file_extension": ".py",
202 | "mimetype": "text/x-python",
203 | "name": "python",
204 | "nbconvert_exporter": "python",
205 | "pygments_lexer": "ipython3",
206 | "version": "3.5.4"
207 | }
208 | },
209 | "nbformat": 4,
210 | "nbformat_minor": 2
211 | }
212 |
--------------------------------------------------------------------------------
/folium_lines/folium_arrow_lines.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "ExecuteTime": {
8 | "end_time": "2019-06-12T14:14:37.729153Z",
9 | "start_time": "2019-06-12T14:14:36.988070Z"
10 | }
11 | },
12 | "outputs": [],
13 | "source": [
14 | "import folium\n",
15 | "import numpy as np\n",
16 | "import pandas as pd\n",
17 | "from collections import namedtuple"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 2,
23 | "metadata": {
24 | "ExecuteTime": {
25 | "end_time": "2019-06-12T14:14:39.812458Z",
26 | "start_time": "2019-06-12T14:14:39.699864Z"
27 | }
28 | },
29 | "outputs": [],
30 | "source": [
31 | "def get_bearing(p1, p2):\n",
32 | " \n",
33 | " '''\n",
34 | " Returns compass bearing from p1 to p2\n",
35 | " \n",
36 | " Parameters\n",
37 | " p1 : namedtuple with lat lon\n",
38 | " p2 : namedtuple with lat lon\n",
39 | " \n",
40 | " Return\n",
41 | " compass bearing of type float\n",
42 | " \n",
43 | " Notes\n",
44 | " Based on https://gist.github.com/jeromer/2005586\n",
45 | " '''\n",
46 | " \n",
47 | " long_diff = np.radians(p2.lon - p1.lon)\n",
48 | " \n",
49 | " lat1 = np.radians(p1.lat)\n",
50 | " lat2 = np.radians(p2.lat)\n",
51 | " \n",
52 | " x = np.sin(long_diff) * np.cos(lat2)\n",
53 | " y = (np.cos(lat1) * np.sin(lat2) \n",
54 | " - (np.sin(lat1) * np.cos(lat2) \n",
55 | " * np.cos(long_diff)))\n",
56 | "\n",
57 | " bearing = np.degrees(np.arctan2(x, y))\n",
58 | " \n",
59 | " # adjusting for compass bearing\n",
60 | " if bearing < 0:\n",
61 | " return bearing + 360\n",
62 | " return bearing\n",
63 | "\n",
64 | "\n",
65 | "def get_arrows(locations, color='blue', size=6, n_arrows=3):\n",
66 | " \n",
67 | " '''\n",
68 | " Get a list of correctly placed and rotated \n",
69 | " arrows/markers to be plotted\n",
70 | " \n",
71 | " Parameters\n",
72 | " locations : list of lists of lat lons that represent the \n",
73 | " start and end of the line. \n",
74 | " eg [[41.1132, -96.1993],[41.3810, -95.8021]]\n",
75 | " arrow_color : whatever folium can use. default is 'blue'\n",
76 | " size : default is 6\n",
77 | " n_arrows : number of arrows to create. default is 3\n",
78 | "\n",
79 | " Return\n",
80 | " list of arrows/markers\n",
81 | " '''\n",
82 | " \n",
83 | " Point = namedtuple('Point', field_names=['lat', 'lon'])\n",
84 | " \n",
85 | " # creating point from our Point named tuple\n",
86 | " p1 = Point(locations[0][0], locations[0][1])\n",
87 | " p2 = Point(locations[1][0], locations[1][1])\n",
88 | " \n",
89 | " # getting the rotation needed for our marker. \n",
90 | " # Subtracting 90 to account for the marker's orientation\n",
91 | " # of due East(get_bearing returns North)\n",
92 | " rotation = get_bearing(p1, p2) - 90\n",
93 | " \n",
94 | " # get an evenly space list of lats and lons for our arrows\n",
95 | " # note that I'm discarding the first and last for aesthetics\n",
96 | " # as I'm using markers to denote the start and end\n",
97 | " arrow_lats = np.linspace(p1.lat, p2.lat, n_arrows + 2)[1:n_arrows+1]\n",
98 | " arrow_lons = np.linspace(p1.lon, p2.lon, n_arrows + 2)[1:n_arrows+1]\n",
99 | " \n",
100 | " arrows = []\n",
101 | " \n",
102 | " #creating each \"arrow\" and appending them to our arrows list\n",
103 | " for points in zip(arrow_lats, arrow_lons):\n",
104 | " arrows.append(folium.RegularPolygonMarker(location=points, \n",
105 | " fill_color=color, number_of_sides=3, \n",
106 | " radius=size, rotation=rotation))\n",
107 | " return arrows"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 7,
113 | "metadata": {
114 | "ExecuteTime": {
115 | "end_time": "2019-06-12T14:14:48.654868Z",
116 | "start_time": "2019-06-12T14:14:48.645346Z"
117 | }
118 | },
119 | "outputs": [],
120 | "source": [
121 | "# using omaha coordinates \n",
122 | "center_lat = 41.257160\n",
123 | "center_lon = -95.995102\n",
124 | "\n",
125 | "# generating a couple of random latlongs in the omaha area\n",
126 | "lats = np.random.uniform(low=center_lat - .25, high=center_lat + .25, size=(2,))\n",
127 | "lons = np.random.uniform(low=center_lon - .25, high=center_lon + .25, size=(2,))\n",
128 | "\n",
129 | "p1 = [lats[0], lons[0]]\n",
130 | "p2 = [lats[1], lons[1]]"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": 8,
136 | "metadata": {
137 | "ExecuteTime": {
138 | "end_time": "2019-06-12T14:14:49.160587Z",
139 | "start_time": "2019-06-12T14:14:49.126357Z"
140 | }
141 | },
142 | "outputs": [
143 | {
144 | "data": {
145 | "text/html": [
146 | ""
147 | ],
148 | "text/plain": [
149 | ""
150 | ]
151 | },
152 | "execution_count": 8,
153 | "metadata": {},
154 | "output_type": "execute_result"
155 | }
156 | ],
157 | "source": [
158 | "some_map = folium.Map(location=[center_lat, center_lon], zoom_start=10)\n",
159 | "\n",
160 | "folium.Marker(location=p1, icon=folium.Icon(color='green')).add_to(some_map)\n",
161 | "folium.Marker(location=p2, icon=folium.Icon(color='red')).add_to(some_map)\n",
162 | "\n",
163 | "folium.PolyLine(locations=[p1, p2], color='blue').add_to(some_map)\n",
164 | "arrows = get_arrows(locations=[p1, p2], color='#FFFFFF', n_arrows=3)\n",
165 | "\n",
166 | "for arrow in arrows:\n",
167 | " arrow.add_to(some_map)\n",
168 | "\n",
169 | "some_map"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": null,
175 | "metadata": {
176 | "collapsed": true
177 | },
178 | "outputs": [],
179 | "source": []
180 | }
181 | ],
182 | "metadata": {
183 | "kernelspec": {
184 | "display_name": "Python [default]",
185 | "language": "python",
186 | "name": "python3"
187 | },
188 | "language_info": {
189 | "codemirror_mode": {
190 | "name": "ipython",
191 | "version": 3
192 | },
193 | "file_extension": ".py",
194 | "mimetype": "text/x-python",
195 | "name": "python",
196 | "nbconvert_exporter": "python",
197 | "pygments_lexer": "ipython3",
198 | "version": "3.5.4"
199 | }
200 | },
201 | "nbformat": 4,
202 | "nbformat_minor": 2
203 | }
204 |
--------------------------------------------------------------------------------
/geopandas_sjoin/geopandas_sjoin.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "ExecuteTime": {
8 | "end_time": "2017-09-18T13:17:47.293425Z",
9 | "start_time": "2017-09-18T13:17:46.276981Z"
10 | },
11 | "collapsed": true
12 | },
13 | "outputs": [],
14 | "source": [
15 | "import pandas as pd\n",
16 | "import geopandas as gpd\n",
17 | "import requests\n",
18 | "import os\n",
19 | "from shapely.geometry import Point"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 2,
25 | "metadata": {
26 | "ExecuteTime": {
27 | "end_time": "2017-09-18T13:17:50.175674Z",
28 | "start_time": "2017-09-18T13:17:48.811340Z"
29 | }
30 | },
31 | "outputs": [
32 | {
33 | "name": "stdout",
34 | "output_type": "stream",
35 | "text": [
36 | "33144\n",
37 | " GEOID INTPTLAT INTPTLONG\n",
38 | "0 00601 18.180555 -66.749961\n",
39 | "1 00602 18.361945 -67.175597\n",
40 | "2 00603 18.455183 -67.119887\n",
41 | "3 00606 18.158345 -66.932911\n",
42 | "4 00610 18.295366 -67.125135\n"
43 | ]
44 | }
45 | ],
46 | "source": [
47 | "url = 'http://www2.census.gov/geo/docs/maps-data/data/gazetteer/2016_Gazetteer/2016_Gaz_zcta_national.zip'\n",
48 | "zips = pd.read_csv(url, dtype={'GEOID' : 'str'},sep='\\t', usecols=[0,5,6])\n",
49 | "zips.columns = zips.columns.str.strip() #some column cleanup\n",
50 | "print (len(zips))\n",
51 | "print (zips.head())"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 3,
57 | "metadata": {
58 | "ExecuteTime": {
59 | "end_time": "2017-09-18T13:18:11.296235Z",
60 | "start_time": "2017-09-18T13:18:10.005132Z"
61 | }
62 | },
63 | "outputs": [
64 | {
65 | "name": "stdout",
66 | "output_type": "stream",
67 | "text": [
68 | " GEOID INTPTLAT INTPTLONG geometry\n",
69 | "0 00601 18.180555 -66.749961 POINT (-66.749961 18.180555)\n",
70 | "1 00602 18.361945 -67.175597 POINT (-67.175597 18.361945)\n",
71 | "2 00603 18.455183 -67.119887 POINT (-67.11988700000001 18.455183)\n",
72 | "3 00606 18.158345 -66.932911 POINT (-66.932911 18.158345)\n",
73 | "4 00610 18.295366 -67.125135 POINT (-67.125135 18.295366)\n"
74 | ]
75 | }
76 | ],
77 | "source": [
78 | "geom = zips.apply(lambda x : Point([x['INTPTLONG'],x['INTPTLAT']]), axis=1)\n",
79 | "zips = gpd.GeoDataFrame(zips, geometry=geom)\n",
80 | "zips.crs = {'init' :'epsg:4326'}\n",
81 | "print (zips.head())"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 5,
87 | "metadata": {
88 | "ExecuteTime": {
89 | "end_time": "2017-09-18T13:18:28.593177Z",
90 | "start_time": "2017-09-18T13:18:28.042696Z"
91 | }
92 | },
93 | "outputs": [
94 | {
95 | "name": "stdout",
96 | "output_type": "stream",
97 | "text": [
98 | "52\n",
99 | " NAME geometry\n",
100 | "0 Maine (POLYGON ((-67.619761 44.519754, -67.61541 44....\n",
101 | "1 Massachusetts (POLYGON ((-70.832044 41.606504, -70.823735 41...\n",
102 | "2 Michigan (POLYGON ((-88.684434 48.115785, -88.675628 48...\n",
103 | "3 Montana POLYGON ((-104.057698 44.997431, -104.250145 4...\n",
104 | "4 Nevada POLYGON ((-114.0506 37.000396, -114.049995 36....\n"
105 | ]
106 | }
107 | ],
108 | "source": [
109 | "geojson_file = 'data/gz_2010_us_040_00_500k.json'\n",
110 | "\n",
111 | "if not os.path.exists(geojson_file):\n",
112 | " url = 'http://eric.clst.org/wupl/Stuff/gz_2010_us_040_00_500k.json'\n",
113 | " with open(geojson_file, 'w') as f:\n",
114 | " f.write(requests.get(url).text)\n",
115 | "\n",
116 | "states = gpd.read_file(geojson_file)[['NAME', 'geometry']]\n",
117 | "print (len(states))\n",
118 | "print (states.head())"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 6,
124 | "metadata": {
125 | "ExecuteTime": {
126 | "end_time": "2017-09-18T13:18:33.216678Z",
127 | "start_time": "2017-09-18T13:18:31.858911Z"
128 | }
129 | },
130 | "outputs": [
131 | {
132 | "name": "stdout",
133 | "output_type": "stream",
134 | "text": [
135 | " GEOID NAME geometry index_right\n",
136 | "0 00601 Puerto Rico POINT (-66.749961 18.180555) 16\n",
137 | "1 00602 Puerto Rico POINT (-67.175597 18.361945) 16\n",
138 | "2 00603 Puerto Rico POINT (-67.11988700000001 18.455183) 16\n",
139 | "3 00606 Puerto Rico POINT (-66.932911 18.158345) 16\n",
140 | "4 00610 Puerto Rico POINT (-67.125135 18.295366) 16\n"
141 | ]
142 | }
143 | ],
144 | "source": [
145 | "zips_and_states = gpd.sjoin(zips, states, op='within')\n",
146 | "print (zips_and_states[['GEOID', 'NAME', 'geometry','index_right']].head())"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": null,
152 | "metadata": {
153 | "collapsed": true
154 | },
155 | "outputs": [],
156 | "source": []
157 | }
158 | ],
159 | "metadata": {
160 | "kernelspec": {
161 | "display_name": "Python 3",
162 | "language": "python",
163 | "name": "python3"
164 | },
165 | "language_info": {
166 | "codemirror_mode": {
167 | "name": "ipython",
168 | "version": 3
169 | },
170 | "file_extension": ".py",
171 | "mimetype": "text/x-python",
172 | "name": "python",
173 | "nbconvert_exporter": "python",
174 | "pygments_lexer": "ipython3",
175 | "version": "3.5.4"
176 | }
177 | },
178 | "nbformat": 4,
179 | "nbformat_minor": 2
180 | }
181 |
--------------------------------------------------------------------------------
/reverse_geocoding/reverse_geocoder.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "ExecuteTime": {
8 | "end_time": "2018-01-31T03:44:20.480521Z",
9 | "start_time": "2018-01-31T03:44:20.359909Z"
10 | },
11 | "collapsed": true
12 | },
13 | "outputs": [],
14 | "source": [
15 | "import numpy as np\n",
16 | "\n",
17 | "omaha_point = np.array((-95.995102, 41.257160)) "
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "## Pandas and scikit-learn's KDTree"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 2,
30 | "metadata": {
31 | "ExecuteTime": {
32 | "end_time": "2018-01-31T03:44:22.265595Z",
33 | "start_time": "2018-01-31T03:44:21.077820Z"
34 | },
35 | "collapsed": true
36 | },
37 | "outputs": [],
38 | "source": [
39 | "import pandas as pd\n",
40 | "from sklearn.neighbors import KDTree"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 5,
46 | "metadata": {
47 | "ExecuteTime": {
48 | "end_time": "2018-01-31T03:44:34.634016Z",
49 | "start_time": "2018-01-31T03:44:33.870474Z"
50 | }
51 | },
52 | "outputs": [
53 | {
54 | "name": "stdout",
55 | "output_type": "stream",
56 | "text": [
57 | "33144\n"
58 | ]
59 | },
60 | {
61 | "data": {
62 | "text/html": [
63 | "\n",
64 | "\n",
77 | "
\n",
78 | " \n",
79 | " \n",
80 | " | \n",
81 | " GEOID | \n",
82 | " INTPTLAT | \n",
83 | " INTPTLONG | \n",
84 | "
\n",
85 | " \n",
86 | " \n",
87 | " \n",
88 | " 0 | \n",
89 | " 00601 | \n",
90 | " 18.180555 | \n",
91 | " -66.749961 | \n",
92 | "
\n",
93 | " \n",
94 | " 1 | \n",
95 | " 00602 | \n",
96 | " 18.361945 | \n",
97 | " -67.175597 | \n",
98 | "
\n",
99 | " \n",
100 | " 2 | \n",
101 | " 00603 | \n",
102 | " 18.455183 | \n",
103 | " -67.119887 | \n",
104 | "
\n",
105 | " \n",
106 | " 3 | \n",
107 | " 00606 | \n",
108 | " 18.158345 | \n",
109 | " -66.932911 | \n",
110 | "
\n",
111 | " \n",
112 | " 4 | \n",
113 | " 00610 | \n",
114 | " 18.295366 | \n",
115 | " -67.125135 | \n",
116 | "
\n",
117 | " \n",
118 | "
\n",
119 | "
"
120 | ],
121 | "text/plain": [
122 | " GEOID INTPTLAT INTPTLONG\n",
123 | "0 00601 18.180555 -66.749961\n",
124 | "1 00602 18.361945 -67.175597\n",
125 | "2 00603 18.455183 -67.119887\n",
126 | "3 00606 18.158345 -66.932911\n",
127 | "4 00610 18.295366 -67.125135"
128 | ]
129 | },
130 | "execution_count": 5,
131 | "metadata": {},
132 | "output_type": "execute_result"
133 | }
134 | ],
135 | "source": [
136 | "url = 'http://www2.census.gov/geo/docs/maps-data/data/gazetteer/2016_Gazetteer/2016_Gaz_zcta_national.zip'\n",
137 | "df_locations = pd.read_csv(url, dtype={'GEOID' : 'str'},sep='\\t', usecols=[0,5,6])\n",
138 | "df_locations.columns = df_locations.columns.str.strip() #some column cleanup\n",
139 | "print (len(df_locations))\n",
140 | "df_locations.head()"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": 6,
146 | "metadata": {
147 | "ExecuteTime": {
148 | "end_time": "2018-01-31T03:44:36.964944Z",
149 | "start_time": "2018-01-31T03:44:36.933236Z"
150 | },
151 | "collapsed": true
152 | },
153 | "outputs": [],
154 | "source": [
155 | "kdt = KDTree(df_locations[['INTPTLONG', 'INTPTLAT']])"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": 7,
161 | "metadata": {
162 | "ExecuteTime": {
163 | "end_time": "2018-01-31T03:44:37.752155Z",
164 | "start_time": "2018-01-31T03:44:37.742238Z"
165 | }
166 | },
167 | "outputs": [
168 | {
169 | "name": "stdout",
170 | "output_type": "stream",
171 | "text": [
172 | "23609 68132\n",
173 | "Name: GEOID, dtype: object\n"
174 | ]
175 | }
176 | ],
177 | "source": [
178 | "omaha_point_kdt = np.expand_dims(omaha_point, axis=0)\n",
179 | "\n",
180 | "nearest_point_index = kdt.query(omaha_point_kdt, k=1, return_distance=False)\n",
181 | "print(df_locations.loc[nearest_point_index[0], 'GEOID'])"
182 | ]
183 | },
184 | {
185 | "cell_type": "markdown",
186 | "metadata": {},
187 | "source": [
188 | "## GeoPandas and Shapely"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 8,
194 | "metadata": {
195 | "ExecuteTime": {
196 | "end_time": "2018-01-31T03:44:42.265888Z",
197 | "start_time": "2018-01-31T03:44:42.053058Z"
198 | },
199 | "collapsed": true
200 | },
201 | "outputs": [],
202 | "source": [
203 | "import geopandas as gpd\n",
204 | "from shapely.geometry import Point"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 10,
210 | "metadata": {
211 | "ExecuteTime": {
212 | "end_time": "2018-01-31T03:54:07.124201Z",
213 | "start_time": "2018-01-31T03:53:50.699288Z"
214 | }
215 | },
216 | "outputs": [
217 | {
218 | "name": "stdout",
219 | "output_type": "stream",
220 | "text": [
221 | "33144\n"
222 | ]
223 | },
224 | {
225 | "data": {
226 | "text/html": [
227 | "\n",
228 | "\n",
241 | "
\n",
242 | " \n",
243 | " \n",
244 | " | \n",
245 | " GEOID10 | \n",
246 | " geometry | \n",
247 | "
\n",
248 | " \n",
249 | " \n",
250 | " \n",
251 | " 0 | \n",
252 | " 43451 | \n",
253 | " POLYGON ((-83.674464 41.331119, -83.6744449999... | \n",
254 | "
\n",
255 | " \n",
256 | " 1 | \n",
257 | " 43452 | \n",
258 | " POLYGON ((-83.067745 41.537718, -83.067729 41.... | \n",
259 | "
\n",
260 | " \n",
261 | " 2 | \n",
262 | " 43456 | \n",
263 | " (POLYGON ((-82.8566 41.681222, -82.856831 41.6... | \n",
264 | "
\n",
265 | " \n",
266 | " 3 | \n",
267 | " 43457 | \n",
268 | " POLYGON ((-83.467474 41.268186, -83.4676039999... | \n",
269 | "
\n",
270 | " \n",
271 | " 4 | \n",
272 | " 43458 | \n",
273 | " POLYGON ((-83.222292 41.531025, -83.2222819999... | \n",
274 | "
\n",
275 | " \n",
276 | "
\n",
277 | "
"
278 | ],
279 | "text/plain": [
280 | " GEOID10 geometry\n",
281 | "0 43451 POLYGON ((-83.674464 41.331119, -83.6744449999...\n",
282 | "1 43452 POLYGON ((-83.067745 41.537718, -83.067729 41....\n",
283 | "2 43456 (POLYGON ((-82.8566 41.681222, -82.856831 41.6...\n",
284 | "3 43457 POLYGON ((-83.467474 41.268186, -83.4676039999...\n",
285 | "4 43458 POLYGON ((-83.222292 41.531025, -83.2222819999..."
286 | ]
287 | },
288 | "execution_count": 10,
289 | "metadata": {},
290 | "output_type": "execute_result"
291 | }
292 | ],
293 | "source": [
294 | "#ftp://ftp2.census.gov/geo/tiger/TIGER2016/ZCTA5/tl_2016_us_zcta510.zip\n",
295 | "gdf_locations = gpd.read_file('data/tl_2016_us_zcta510/tl_2016_us_zcta510.shp')\n",
296 | "print(len(gdf_locations))\n",
297 | "gdf_locations[['GEOID10', 'geometry']].head()"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": 11,
303 | "metadata": {
304 | "ExecuteTime": {
305 | "end_time": "2018-01-31T03:54:09.626623Z",
306 | "start_time": "2018-01-31T03:54:09.010534Z"
307 | }
308 | },
309 | "outputs": [
310 | {
311 | "name": "stdout",
312 | "output_type": "stream",
313 | "text": [
314 | "24842 68132\n",
315 | "Name: GEOID10, dtype: object\n"
316 | ]
317 | }
318 | ],
319 | "source": [
320 | "omaha_point_shp = Point(omaha_point)\n",
321 | "\n",
322 | "filter = gdf_locations['geometry'].contains(omaha_point_shp)\n",
323 | "print(gdf_locations.loc[filter, 'GEOID10'])"
324 | ]
325 | },
326 | {
327 | "cell_type": "markdown",
328 | "metadata": {},
329 | "source": [
330 | "## Timing"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 12,
336 | "metadata": {
337 | "ExecuteTime": {
338 | "end_time": "2018-01-31T03:54:15.041312Z",
339 | "start_time": "2018-01-31T03:54:10.481233Z"
340 | }
341 | },
342 | "outputs": [
343 | {
344 | "name": "stdout",
345 | "output_type": "stream",
346 | "text": [
347 | "563 µs ± 27.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
348 | ]
349 | }
350 | ],
351 | "source": [
352 | "%%timeit\n",
353 | "nearest_point_index = kdt.query(omaha_point_kdt, k=1, return_distance=False)\n",
354 | "df_locations.loc[nearest_point_index[0], 'GEOID']"
355 | ]
356 | },
357 | {
358 | "cell_type": "code",
359 | "execution_count": 13,
360 | "metadata": {
361 | "ExecuteTime": {
362 | "end_time": "2018-01-31T03:54:17.247895Z",
363 | "start_time": "2018-01-31T03:54:15.043292Z"
364 | }
365 | },
366 | "outputs": [
367 | {
368 | "name": "stdout",
369 | "output_type": "stream",
370 | "text": [
371 | "276 ms ± 17.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
372 | ]
373 | }
374 | ],
375 | "source": [
376 | "%%timeit\n",
377 | "filter = gdf_locations['geometry'].contains(omaha_point_shp)\n",
378 | "gdf_locations.loc[filter, 'GEOID10']"
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "execution_count": null,
384 | "metadata": {
385 | "collapsed": true
386 | },
387 | "outputs": [],
388 | "source": []
389 | }
390 | ],
391 | "metadata": {
392 | "kernelspec": {
393 | "display_name": "Python 3",
394 | "language": "python",
395 | "name": "python3"
396 | },
397 | "language_info": {
398 | "codemirror_mode": {
399 | "name": "ipython",
400 | "version": 3
401 | },
402 | "file_extension": ".py",
403 | "mimetype": "text/x-python",
404 | "name": "python",
405 | "nbconvert_exporter": "python",
406 | "pygments_lexer": "ipython3",
407 | "version": "3.5.4"
408 | }
409 | },
410 | "nbformat": 4,
411 | "nbformat_minor": 2
412 | }
413 |
--------------------------------------------------------------------------------